jj_lib/
conflicts.rs

1// Copyright 2020 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![allow(missing_docs)]
16
17use std::io;
18use std::io::Read;
19use std::io::Write;
20use std::iter::zip;
21
22use bstr::BString;
23use bstr::ByteSlice as _;
24use futures::stream::BoxStream;
25use futures::try_join;
26use futures::Stream;
27use futures::StreamExt as _;
28use itertools::Itertools as _;
29use pollster::FutureExt as _;
30
31use crate::backend::BackendError;
32use crate::backend::BackendResult;
33use crate::backend::CommitId;
34use crate::backend::FileId;
35use crate::backend::SymlinkId;
36use crate::backend::TreeId;
37use crate::backend::TreeValue;
38use crate::copies::CopiesTreeDiffEntry;
39use crate::copies::CopiesTreeDiffEntryPath;
40use crate::diff::Diff;
41use crate::diff::DiffHunk;
42use crate::diff::DiffHunkKind;
43use crate::files;
44use crate::files::MergeResult;
45use crate::merge::Merge;
46use crate::merge::MergedTreeValue;
47use crate::repo_path::RepoPath;
48use crate::store::Store;
49
50/// Minimum length of conflict markers.
51pub const MIN_CONFLICT_MARKER_LEN: usize = 7;
52
53/// If a file already contains lines which look like conflict markers of length
54/// N, then the conflict markers we add will be of length (N + increment). This
55/// number is chosen to make the conflict markers noticeably longer than the
56/// existing markers.
57const CONFLICT_MARKER_LEN_INCREMENT: usize = 4;
58
59/// Comment for missing terminating newline in a term of a conflict.
60const NO_EOL_COMMENT: &str = " (no terminating newline)";
61
62/// Comment for missing terminating newline in the "add" side of a diff.
63const ADD_NO_EOL_COMMENT: &str = " (removes terminating newline)";
64
65/// Comment for missing terminating newline in the "remove" side of a diff.
66const REMOVE_NO_EOL_COMMENT: &str = " (adds terminating newline)";
67
68fn write_diff_hunks(hunks: &[DiffHunk], file: &mut dyn Write) -> io::Result<()> {
69    for hunk in hunks {
70        match hunk.kind {
71            DiffHunkKind::Matching => {
72                debug_assert!(hunk.contents.iter().all_equal());
73                for line in hunk.contents[0].lines_with_terminator() {
74                    file.write_all(b" ")?;
75                    write_and_ensure_newline(file, line)?;
76                }
77            }
78            DiffHunkKind::Different => {
79                for line in hunk.contents[0].lines_with_terminator() {
80                    file.write_all(b"-")?;
81                    write_and_ensure_newline(file, line)?;
82                }
83                for line in hunk.contents[1].lines_with_terminator() {
84                    file.write_all(b"+")?;
85                    write_and_ensure_newline(file, line)?;
86                }
87            }
88        }
89    }
90    Ok(())
91}
92
93async fn get_file_contents(
94    store: &Store,
95    path: &RepoPath,
96    term: &Option<FileId>,
97) -> BackendResult<BString> {
98    match term {
99        Some(id) => {
100            let mut content = vec![];
101            store
102                .read_file_async(path, id)
103                .await?
104                .read_to_end(&mut content)
105                .map_err(|err| BackendError::ReadFile {
106                    path: path.to_owned(),
107                    id: id.clone(),
108                    source: err.into(),
109                })?;
110            Ok(BString::new(content))
111        }
112        // If the conflict had removed the file on one side, we pretend that the file
113        // was empty there.
114        None => Ok(BString::new(vec![])),
115    }
116}
117
118pub async fn extract_as_single_hunk(
119    merge: &Merge<Option<FileId>>,
120    store: &Store,
121    path: &RepoPath,
122) -> BackendResult<Merge<BString>> {
123    merge
124        .try_map_async(|term| get_file_contents(store, path, term))
125        .await
126}
127
128/// A type similar to `MergedTreeValue` but with associated data to include in
129/// e.g. the working copy or in a diff.
130pub enum MaterializedTreeValue {
131    Absent,
132    AccessDenied(Box<dyn std::error::Error + Send + Sync>),
133    File(MaterializedFileValue),
134    Symlink { id: SymlinkId, target: String },
135    FileConflict(MaterializedFileConflictValue),
136    OtherConflict { id: MergedTreeValue },
137    GitSubmodule(CommitId),
138    Tree(TreeId),
139}
140
141impl MaterializedTreeValue {
142    pub fn is_absent(&self) -> bool {
143        matches!(self, MaterializedTreeValue::Absent)
144    }
145
146    pub fn is_present(&self) -> bool {
147        !self.is_absent()
148    }
149}
150
151/// [`TreeValue::File`] with file content `reader`.
152pub struct MaterializedFileValue {
153    pub id: FileId,
154    pub executable: bool,
155    pub reader: Box<dyn Read>,
156}
157
158impl MaterializedFileValue {
159    /// Reads file content until EOF. The provided `path` is used only for error
160    /// reporting purpose.
161    pub fn read_all(&mut self, path: &RepoPath) -> BackendResult<Vec<u8>> {
162        let mut buf = Vec::new();
163        self.reader
164            .read_to_end(&mut buf)
165            .map_err(|err| BackendError::ReadFile {
166                path: path.to_owned(),
167                id: self.id.clone(),
168                source: err.into(),
169            })?;
170        Ok(buf)
171    }
172}
173
174/// Conflicted [`TreeValue::File`]s with file contents.
175pub struct MaterializedFileConflictValue {
176    /// File ids which preserve the shape of the tree conflict, to be used with
177    /// [`Merge::update_from_simplified()`].
178    pub unsimplified_ids: Merge<Option<FileId>>,
179    /// Simplified file ids, in which redundant id pairs are dropped.
180    pub ids: Merge<Option<FileId>>,
181    /// File contents corresponding to the simplified `ids`.
182    // TODO: or Vec<(FileId, Box<dyn Read>)> so that caller can stop reading
183    // when null bytes found?
184    pub contents: Merge<BString>,
185    /// Merged executable bit. `None` if there are changes in both executable
186    /// bit and file absence.
187    pub executable: Option<bool>,
188}
189
190/// Reads the data associated with a `MergedTreeValue` so it can be written to
191/// e.g. the working copy or diff.
192pub async fn materialize_tree_value(
193    store: &Store,
194    path: &RepoPath,
195    value: MergedTreeValue,
196) -> BackendResult<MaterializedTreeValue> {
197    match materialize_tree_value_no_access_denied(store, path, value).await {
198        Err(BackendError::ReadAccessDenied { source, .. }) => {
199            Ok(MaterializedTreeValue::AccessDenied(source))
200        }
201        result => result,
202    }
203}
204
205async fn materialize_tree_value_no_access_denied(
206    store: &Store,
207    path: &RepoPath,
208    value: MergedTreeValue,
209) -> BackendResult<MaterializedTreeValue> {
210    match value.into_resolved() {
211        Ok(None) => Ok(MaterializedTreeValue::Absent),
212        Ok(Some(TreeValue::File { id, executable })) => {
213            let reader = store.read_file_async(path, &id).await?;
214            Ok(MaterializedTreeValue::File(MaterializedFileValue {
215                id,
216                executable,
217                reader,
218            }))
219        }
220        Ok(Some(TreeValue::Symlink(id))) => {
221            let target = store.read_symlink_async(path, &id).await?;
222            Ok(MaterializedTreeValue::Symlink { id, target })
223        }
224        Ok(Some(TreeValue::GitSubmodule(id))) => Ok(MaterializedTreeValue::GitSubmodule(id)),
225        Ok(Some(TreeValue::Tree(id))) => Ok(MaterializedTreeValue::Tree(id)),
226        Ok(Some(TreeValue::Conflict(_))) => {
227            panic!("cannot materialize legacy conflict object at path {path:?}");
228        }
229        Err(conflict) => match try_materialize_file_conflict_value(store, path, &conflict).await? {
230            Some(file) => Ok(MaterializedTreeValue::FileConflict(file)),
231            None => Ok(MaterializedTreeValue::OtherConflict { id: conflict }),
232        },
233    }
234}
235
236/// Suppose `conflict` contains only files or absent entries, reads the file
237/// contents.
238pub async fn try_materialize_file_conflict_value(
239    store: &Store,
240    path: &RepoPath,
241    conflict: &MergedTreeValue,
242) -> BackendResult<Option<MaterializedFileConflictValue>> {
243    let (Some(unsimplified_ids), Some(executable_bits)) =
244        (conflict.to_file_merge(), conflict.to_executable_merge())
245    else {
246        return Ok(None);
247    };
248    let ids = unsimplified_ids.simplify();
249    let contents = extract_as_single_hunk(&ids, store, path).await?;
250    let executable = resolve_file_executable(&executable_bits);
251    Ok(Some(MaterializedFileConflictValue {
252        unsimplified_ids,
253        ids,
254        contents,
255        executable,
256    }))
257}
258
259/// Resolves conflicts in file executable bit, returns the original state if the
260/// file is deleted and executable bit is unchanged.
261pub fn resolve_file_executable(merge: &Merge<Option<bool>>) -> Option<bool> {
262    let resolved = merge.resolve_trivial().copied()?;
263    if resolved.is_some() {
264        resolved
265    } else {
266        // If the merge is resolved to None (absent), there should be the same
267        // number of Some(true) and Some(false). Pick the old state if
268        // unambiguous, so the new file inherits the original executable bit.
269        merge.removes().flatten().copied().all_equal_value().ok()
270    }
271}
272
273/// Describes what style should be used when materializing conflicts.
274#[derive(Clone, Copy, PartialEq, Eq, Debug, Default, serde::Deserialize)]
275#[serde(rename_all = "kebab-case")]
276pub enum ConflictMarkerStyle {
277    /// Style which shows a snapshot and a series of diffs to apply.
278    #[default]
279    Diff,
280    /// Style which shows a snapshot for each base and side.
281    Snapshot,
282    /// Style which replicates Git's "diff3" style to support external tools.
283    Git,
284}
285
286/// Characters which can be repeated to form a conflict marker line when
287/// materializing and parsing conflicts.
288#[derive(Clone, Copy, PartialEq, Eq)]
289#[repr(u8)]
290enum ConflictMarkerLineChar {
291    ConflictStart = b'<',
292    ConflictEnd = b'>',
293    Add = b'+',
294    Remove = b'-',
295    Diff = b'%',
296    GitAncestor = b'|',
297    GitSeparator = b'=',
298}
299
300impl ConflictMarkerLineChar {
301    /// Get the ASCII byte used for this conflict marker.
302    fn to_byte(self) -> u8 {
303        self as u8
304    }
305
306    /// Parse a byte to see if it corresponds with any kind of conflict marker.
307    fn parse_byte(byte: u8) -> Option<Self> {
308        match byte {
309            b'<' => Some(Self::ConflictStart),
310            b'>' => Some(Self::ConflictEnd),
311            b'+' => Some(Self::Add),
312            b'-' => Some(Self::Remove),
313            b'%' => Some(Self::Diff),
314            b'|' => Some(Self::GitAncestor),
315            b'=' => Some(Self::GitSeparator),
316            _ => None,
317        }
318    }
319}
320
321/// Represents a conflict marker line parsed from the file. Conflict marker
322/// lines consist of a single ASCII character repeated for a certain length.
323struct ConflictMarkerLine {
324    kind: ConflictMarkerLineChar,
325    len: usize,
326}
327
328/// Write a conflict marker to an output file.
329fn write_conflict_marker(
330    output: &mut dyn Write,
331    kind: ConflictMarkerLineChar,
332    len: usize,
333    suffix_text: &str,
334) -> io::Result<()> {
335    let conflict_marker = BString::new(vec![kind.to_byte(); len]);
336
337    if suffix_text.is_empty() {
338        writeln!(output, "{conflict_marker}")
339    } else {
340        writeln!(output, "{conflict_marker} {suffix_text}")
341    }
342}
343
344/// Parse a conflict marker from a line of a file. The conflict marker may have
345/// any length (even less than MIN_CONFLICT_MARKER_LEN).
346fn parse_conflict_marker_any_len(line: &[u8]) -> Option<ConflictMarkerLine> {
347    let first_byte = *line.first()?;
348    let kind = ConflictMarkerLineChar::parse_byte(first_byte)?;
349    let len = line.iter().take_while(|&&b| b == first_byte).count();
350
351    if let Some(next_byte) = line.get(len) {
352        // If there is a character after the marker, it must be ASCII whitespace
353        if !next_byte.is_ascii_whitespace() {
354            return None;
355        }
356    }
357
358    Some(ConflictMarkerLine { kind, len })
359}
360
361/// Parse a conflict marker, expecting it to be at least a certain length. Any
362/// shorter conflict markers are ignored.
363fn parse_conflict_marker(line: &[u8], expected_len: usize) -> Option<ConflictMarkerLineChar> {
364    parse_conflict_marker_any_len(line)
365        .filter(|marker| marker.len >= expected_len)
366        .map(|marker| marker.kind)
367}
368
369/// Given a Merge of files, choose the conflict marker length to use when
370/// materializing conflicts.
371pub fn choose_materialized_conflict_marker_len<T: AsRef<[u8]>>(single_hunk: &Merge<T>) -> usize {
372    let max_existing_marker_len = single_hunk
373        .iter()
374        .flat_map(|file| file.as_ref().lines_with_terminator())
375        .filter_map(parse_conflict_marker_any_len)
376        .map(|marker| marker.len)
377        .max()
378        .unwrap_or_default();
379
380    max_existing_marker_len
381        .saturating_add(CONFLICT_MARKER_LEN_INCREMENT)
382        .max(MIN_CONFLICT_MARKER_LEN)
383}
384
385pub fn materialize_merge_result<T: AsRef<[u8]>>(
386    single_hunk: &Merge<T>,
387    conflict_marker_style: ConflictMarkerStyle,
388    output: &mut dyn Write,
389) -> io::Result<()> {
390    let merge_result = files::merge_hunks(single_hunk);
391    match &merge_result {
392        MergeResult::Resolved(content) => output.write_all(content),
393        MergeResult::Conflict(hunks) => {
394            let conflict_marker_len = choose_materialized_conflict_marker_len(single_hunk);
395            materialize_conflict_hunks(hunks, conflict_marker_style, conflict_marker_len, output)
396        }
397    }
398}
399
400pub fn materialize_merge_result_with_marker_len<T: AsRef<[u8]>>(
401    single_hunk: &Merge<T>,
402    conflict_marker_style: ConflictMarkerStyle,
403    conflict_marker_len: usize,
404    output: &mut dyn Write,
405) -> io::Result<()> {
406    let merge_result = files::merge_hunks(single_hunk);
407    match &merge_result {
408        MergeResult::Resolved(content) => output.write_all(content),
409        MergeResult::Conflict(hunks) => {
410            materialize_conflict_hunks(hunks, conflict_marker_style, conflict_marker_len, output)
411        }
412    }
413}
414
415pub fn materialize_merge_result_to_bytes<T: AsRef<[u8]>>(
416    single_hunk: &Merge<T>,
417    conflict_marker_style: ConflictMarkerStyle,
418) -> BString {
419    let merge_result = files::merge_hunks(single_hunk);
420    match merge_result {
421        MergeResult::Resolved(content) => content,
422        MergeResult::Conflict(hunks) => {
423            let conflict_marker_len = choose_materialized_conflict_marker_len(single_hunk);
424            let mut output = Vec::new();
425            materialize_conflict_hunks(
426                &hunks,
427                conflict_marker_style,
428                conflict_marker_len,
429                &mut output,
430            )
431            .expect("writing to an in-memory buffer should never fail");
432            output.into()
433        }
434    }
435}
436
437pub fn materialize_merge_result_to_bytes_with_marker_len<T: AsRef<[u8]>>(
438    single_hunk: &Merge<T>,
439    conflict_marker_style: ConflictMarkerStyle,
440    conflict_marker_len: usize,
441) -> BString {
442    let merge_result = files::merge_hunks(single_hunk);
443    match merge_result {
444        MergeResult::Resolved(content) => content,
445        MergeResult::Conflict(hunks) => {
446            let mut output = Vec::new();
447            materialize_conflict_hunks(
448                &hunks,
449                conflict_marker_style,
450                conflict_marker_len,
451                &mut output,
452            )
453            .expect("writing to an in-memory buffer should never fail");
454            output.into()
455        }
456    }
457}
458
459fn materialize_conflict_hunks(
460    hunks: &[Merge<BString>],
461    conflict_marker_style: ConflictMarkerStyle,
462    conflict_marker_len: usize,
463    output: &mut dyn Write,
464) -> io::Result<()> {
465    let num_conflicts = hunks
466        .iter()
467        .filter(|hunk| hunk.as_resolved().is_none())
468        .count();
469    let mut conflict_index = 0;
470    for hunk in hunks {
471        if let Some(content) = hunk.as_resolved() {
472            output.write_all(content)?;
473        } else {
474            conflict_index += 1;
475            let conflict_info = format!("Conflict {conflict_index} of {num_conflicts}");
476
477            match (conflict_marker_style, hunk.as_slice()) {
478                // 2-sided conflicts can use Git-style conflict markers
479                (ConflictMarkerStyle::Git, [left, base, right]) => {
480                    materialize_git_style_conflict(
481                        left,
482                        base,
483                        right,
484                        &conflict_info,
485                        conflict_marker_len,
486                        output,
487                    )?;
488                }
489                _ => {
490                    materialize_jj_style_conflict(
491                        hunk,
492                        &conflict_info,
493                        conflict_marker_style,
494                        conflict_marker_len,
495                        output,
496                    )?;
497                }
498            }
499        }
500    }
501    Ok(())
502}
503
504fn materialize_git_style_conflict(
505    left: &[u8],
506    base: &[u8],
507    right: &[u8],
508    conflict_info: &str,
509    conflict_marker_len: usize,
510    output: &mut dyn Write,
511) -> io::Result<()> {
512    write_conflict_marker(
513        output,
514        ConflictMarkerLineChar::ConflictStart,
515        conflict_marker_len,
516        &format!("Side #1 ({conflict_info})"),
517    )?;
518    write_and_ensure_newline(output, left)?;
519
520    write_conflict_marker(
521        output,
522        ConflictMarkerLineChar::GitAncestor,
523        conflict_marker_len,
524        "Base",
525    )?;
526    write_and_ensure_newline(output, base)?;
527
528    // VS Code doesn't seem to support any trailing text on the separator line
529    write_conflict_marker(
530        output,
531        ConflictMarkerLineChar::GitSeparator,
532        conflict_marker_len,
533        "",
534    )?;
535
536    write_and_ensure_newline(output, right)?;
537    write_conflict_marker(
538        output,
539        ConflictMarkerLineChar::ConflictEnd,
540        conflict_marker_len,
541        &format!("Side #2 ({conflict_info} ends)"),
542    )?;
543
544    Ok(())
545}
546
547fn materialize_jj_style_conflict(
548    hunk: &Merge<BString>,
549    conflict_info: &str,
550    conflict_marker_style: ConflictMarkerStyle,
551    conflict_marker_len: usize,
552    output: &mut dyn Write,
553) -> io::Result<()> {
554    // Write a positive snapshot (side) of a conflict
555    let write_side = |add_index: usize, data: &[u8], output: &mut dyn Write| {
556        write_conflict_marker(
557            output,
558            ConflictMarkerLineChar::Add,
559            conflict_marker_len,
560            &format!(
561                "Contents of side #{}{}",
562                add_index + 1,
563                maybe_no_eol_comment(data)
564            ),
565        )?;
566        write_and_ensure_newline(output, data)
567    };
568
569    // Write a negative snapshot (base) of a conflict
570    let write_base = |base_str: &str, data: &[u8], output: &mut dyn Write| {
571        write_conflict_marker(
572            output,
573            ConflictMarkerLineChar::Remove,
574            conflict_marker_len,
575            &format!("Contents of {base_str}{}", maybe_no_eol_comment(data)),
576        )?;
577        write_and_ensure_newline(output, data)
578    };
579
580    // Write a diff from a negative term to a positive term
581    let write_diff =
582        |base_str: &str, add_index: usize, diff: &[DiffHunk], output: &mut dyn Write| {
583            let no_eol_remove = diff
584                .last()
585                .is_some_and(|diff_hunk| has_no_eol(diff_hunk.contents[0]));
586            let no_eol_add = diff
587                .last()
588                .is_some_and(|diff_hunk| has_no_eol(diff_hunk.contents[1]));
589            let no_eol_comment = match (no_eol_remove, no_eol_add) {
590                (true, true) => NO_EOL_COMMENT,
591                (true, _) => REMOVE_NO_EOL_COMMENT,
592                (_, true) => ADD_NO_EOL_COMMENT,
593                _ => "",
594            };
595            write_conflict_marker(
596                output,
597                ConflictMarkerLineChar::Diff,
598                conflict_marker_len,
599                &format!(
600                    "Changes from {base_str} to side #{}{no_eol_comment}",
601                    add_index + 1
602                ),
603            )?;
604            write_diff_hunks(diff, output)
605        };
606
607    write_conflict_marker(
608        output,
609        ConflictMarkerLineChar::ConflictStart,
610        conflict_marker_len,
611        conflict_info,
612    )?;
613    let mut add_index = 0;
614    for (base_index, left) in hunk.removes().enumerate() {
615        // The vast majority of conflicts one actually tries to resolve manually have 1
616        // base.
617        let base_str = if hunk.removes().len() == 1 {
618            "base".to_string()
619        } else {
620            format!("base #{}", base_index + 1)
621        };
622
623        let Some(right1) = hunk.get_add(add_index) else {
624            // If we have no more positive terms, emit the remaining negative terms as
625            // snapshots.
626            write_base(&base_str, left, output)?;
627            continue;
628        };
629
630        // For any style other than "diff", always emit sides and bases separately
631        if conflict_marker_style != ConflictMarkerStyle::Diff {
632            write_side(add_index, right1, output)?;
633            write_base(&base_str, left, output)?;
634            add_index += 1;
635            continue;
636        }
637
638        let diff1 = Diff::by_line([&left, &right1]).hunks().collect_vec();
639        // Check if the diff against the next positive term is better. Since we want to
640        // preserve the order of the terms, we don't match against any later positive
641        // terms.
642        if let Some(right2) = hunk.get_add(add_index + 1) {
643            let diff2 = Diff::by_line([&left, &right2]).hunks().collect_vec();
644            if diff_size(&diff2) < diff_size(&diff1) {
645                // If the next positive term is a better match, emit the current positive term
646                // as a snapshot and the next positive term as a diff.
647                write_side(add_index, right1, output)?;
648                write_diff(&base_str, add_index + 1, &diff2, output)?;
649                add_index += 2;
650                continue;
651            }
652        }
653
654        write_diff(&base_str, add_index, &diff1, output)?;
655        add_index += 1;
656    }
657
658    // Emit the remaining positive terms as snapshots.
659    for (add_index, slice) in hunk.adds().enumerate().skip(add_index) {
660        write_side(add_index, slice, output)?;
661    }
662    write_conflict_marker(
663        output,
664        ConflictMarkerLineChar::ConflictEnd,
665        conflict_marker_len,
666        &format!("{conflict_info} ends"),
667    )?;
668    Ok(())
669}
670
671fn maybe_no_eol_comment(slice: &[u8]) -> &'static str {
672    if has_no_eol(slice) {
673        NO_EOL_COMMENT
674    } else {
675        ""
676    }
677}
678
679// Write a chunk of data, ensuring that it doesn't end with a line which is
680// missing its terminating newline.
681fn write_and_ensure_newline(output: &mut dyn Write, data: &[u8]) -> io::Result<()> {
682    output.write_all(data)?;
683    if has_no_eol(data) {
684        writeln!(output)?;
685    }
686    Ok(())
687}
688
689// Check whether a slice is missing its terminating newline character.
690fn has_no_eol(slice: &[u8]) -> bool {
691    slice.last().is_some_and(|&last| last != b'\n')
692}
693
694fn diff_size(hunks: &[DiffHunk]) -> usize {
695    hunks
696        .iter()
697        .map(|hunk| match hunk.kind {
698            DiffHunkKind::Matching => 0,
699            DiffHunkKind::Different => hunk.contents.iter().map(|content| content.len()).sum(),
700        })
701        .sum()
702}
703
704pub struct MaterializedTreeDiffEntry {
705    pub path: CopiesTreeDiffEntryPath,
706    pub values: BackendResult<(MaterializedTreeValue, MaterializedTreeValue)>,
707}
708
709pub fn materialized_diff_stream<'a>(
710    store: &'a Store,
711    tree_diff: BoxStream<'a, CopiesTreeDiffEntry>,
712) -> impl Stream<Item = MaterializedTreeDiffEntry> + use<'a> {
713    tree_diff
714        .map(|CopiesTreeDiffEntry { path, values }| async {
715            match values {
716                Err(err) => MaterializedTreeDiffEntry {
717                    path,
718                    values: Err(err),
719                },
720                Ok((before, after)) => {
721                    let before_future = materialize_tree_value(store, path.source(), before);
722                    let after_future = materialize_tree_value(store, path.target(), after);
723                    let values = try_join!(before_future, after_future);
724                    MaterializedTreeDiffEntry { path, values }
725                }
726            }
727        })
728        .buffered((store.concurrency() / 2).max(1))
729}
730
731/// Parses conflict markers from a slice.
732///
733/// Returns `None` if there were no valid conflict markers. The caller
734/// has to provide the expected number of merge sides (adds). Conflict
735/// markers that are otherwise valid will be considered invalid if
736/// they don't have the expected arity.
737///
738/// All conflict markers in the file must be at least as long as the expected
739/// length. Any shorter conflict markers will be ignored.
740// TODO: "parse" is not usually the opposite of "materialize", so maybe we
741// should rename them to "serialize" and "deserialize"?
742pub fn parse_conflict(
743    input: &[u8],
744    num_sides: usize,
745    expected_marker_len: usize,
746) -> Option<Vec<Merge<BString>>> {
747    if input.is_empty() {
748        return None;
749    }
750    let mut hunks = vec![];
751    let mut pos = 0;
752    let mut resolved_start = 0;
753    let mut conflict_start = None;
754    let mut conflict_start_len = 0;
755    for line in input.lines_with_terminator() {
756        match parse_conflict_marker(line, expected_marker_len) {
757            Some(ConflictMarkerLineChar::ConflictStart) => {
758                conflict_start = Some(pos);
759                conflict_start_len = line.len();
760            }
761            Some(ConflictMarkerLineChar::ConflictEnd) => {
762                if let Some(conflict_start_index) = conflict_start.take() {
763                    let conflict_body = &input[conflict_start_index + conflict_start_len..pos];
764                    let hunk = parse_conflict_hunk(conflict_body, expected_marker_len);
765                    if hunk.num_sides() == num_sides {
766                        let resolved_slice = &input[resolved_start..conflict_start_index];
767                        if !resolved_slice.is_empty() {
768                            hunks.push(Merge::resolved(BString::from(resolved_slice)));
769                        }
770                        hunks.push(hunk);
771                        resolved_start = pos + line.len();
772                    }
773                }
774            }
775            _ => {}
776        }
777        pos += line.len();
778    }
779
780    if hunks.is_empty() {
781        None
782    } else {
783        if resolved_start < input.len() {
784            hunks.push(Merge::resolved(BString::from(&input[resolved_start..])));
785        }
786        Some(hunks)
787    }
788}
789
790/// This method handles parsing both JJ-style and Git-style conflict markers,
791/// meaning that switching conflict marker styles won't prevent existing files
792/// with other conflict marker styles from being parsed successfully. The
793/// conflict marker style to use for parsing is determined based on the first
794/// line of the hunk.
795fn parse_conflict_hunk(input: &[u8], expected_marker_len: usize) -> Merge<BString> {
796    // If the hunk starts with a conflict marker, find its first character
797    let initial_conflict_marker = input
798        .lines_with_terminator()
799        .next()
800        .and_then(|line| parse_conflict_marker(line, expected_marker_len));
801
802    match initial_conflict_marker {
803        // JJ-style conflicts must start with one of these 3 conflict marker lines
804        Some(
805            ConflictMarkerLineChar::Diff
806            | ConflictMarkerLineChar::Remove
807            | ConflictMarkerLineChar::Add,
808        ) => parse_jj_style_conflict_hunk(input, expected_marker_len),
809        // Git-style conflicts either must not start with a conflict marker line, or must start with
810        // the "|||||||" conflict marker line (if the first side was empty)
811        None | Some(ConflictMarkerLineChar::GitAncestor) => {
812            parse_git_style_conflict_hunk(input, expected_marker_len)
813        }
814        // No other conflict markers are allowed at the start of a hunk
815        Some(_) => Merge::resolved(BString::new(vec![])),
816    }
817}
818
819fn parse_jj_style_conflict_hunk(input: &[u8], expected_marker_len: usize) -> Merge<BString> {
820    enum State {
821        Diff,
822        Remove,
823        Add,
824        Unknown,
825    }
826    let mut state = State::Unknown;
827    let mut removes = vec![];
828    let mut adds = vec![];
829    for line in input.lines_with_terminator() {
830        match parse_conflict_marker(line, expected_marker_len) {
831            Some(ConflictMarkerLineChar::Diff) => {
832                state = State::Diff;
833                removes.push(BString::new(vec![]));
834                adds.push(BString::new(vec![]));
835                continue;
836            }
837            Some(ConflictMarkerLineChar::Remove) => {
838                state = State::Remove;
839                removes.push(BString::new(vec![]));
840                continue;
841            }
842            Some(ConflictMarkerLineChar::Add) => {
843                state = State::Add;
844                adds.push(BString::new(vec![]));
845                continue;
846            }
847            _ => {}
848        }
849        match state {
850            State::Diff => {
851                if let Some(rest) = line.strip_prefix(b"-") {
852                    removes.last_mut().unwrap().extend_from_slice(rest);
853                } else if let Some(rest) = line.strip_prefix(b"+") {
854                    adds.last_mut().unwrap().extend_from_slice(rest);
855                } else if let Some(rest) = line.strip_prefix(b" ") {
856                    removes.last_mut().unwrap().extend_from_slice(rest);
857                    adds.last_mut().unwrap().extend_from_slice(rest);
858                } else if line == b"\n" || line == b"\r\n" {
859                    // Some editors strip trailing whitespace, so " \n" might become "\n". It would
860                    // be unfortunate if this prevented the conflict from being parsed, so we add
861                    // the empty line to the "remove" and "add" as if there was a space in front
862                    removes.last_mut().unwrap().extend_from_slice(line);
863                    adds.last_mut().unwrap().extend_from_slice(line);
864                } else {
865                    // Doesn't look like a valid conflict
866                    return Merge::resolved(BString::new(vec![]));
867                }
868            }
869            State::Remove => {
870                removes.last_mut().unwrap().extend_from_slice(line);
871            }
872            State::Add => {
873                adds.last_mut().unwrap().extend_from_slice(line);
874            }
875            State::Unknown => {
876                // Doesn't look like a valid conflict
877                return Merge::resolved(BString::new(vec![]));
878            }
879        }
880    }
881
882    if adds.len() == removes.len() + 1 {
883        Merge::from_removes_adds(removes, adds)
884    } else {
885        // Doesn't look like a valid conflict
886        Merge::resolved(BString::new(vec![]))
887    }
888}
889
890fn parse_git_style_conflict_hunk(input: &[u8], expected_marker_len: usize) -> Merge<BString> {
891    #[derive(PartialEq, Eq)]
892    enum State {
893        Left,
894        Base,
895        Right,
896    }
897    let mut state = State::Left;
898    let mut left = BString::new(vec![]);
899    let mut base = BString::new(vec![]);
900    let mut right = BString::new(vec![]);
901    for line in input.lines_with_terminator() {
902        match parse_conflict_marker(line, expected_marker_len) {
903            Some(ConflictMarkerLineChar::GitAncestor) => {
904                if state == State::Left {
905                    state = State::Base;
906                    continue;
907                } else {
908                    // Base must come after left
909                    return Merge::resolved(BString::new(vec![]));
910                }
911            }
912            Some(ConflictMarkerLineChar::GitSeparator) => {
913                if state == State::Base {
914                    state = State::Right;
915                    continue;
916                } else {
917                    // Right must come after base
918                    return Merge::resolved(BString::new(vec![]));
919                }
920            }
921            _ => {}
922        }
923        match state {
924            State::Left => left.extend_from_slice(line),
925            State::Base => base.extend_from_slice(line),
926            State::Right => right.extend_from_slice(line),
927        }
928    }
929
930    if state == State::Right {
931        Merge::from_vec(vec![left, base, right])
932    } else {
933        // Doesn't look like a valid conflict
934        Merge::resolved(BString::new(vec![]))
935    }
936}
937
938/// Parses conflict markers in `content` and returns an updated version of
939/// `file_ids` with the new contents. If no (valid) conflict markers remain, a
940/// single resolves `FileId` will be returned.
941pub async fn update_from_content(
942    file_ids: &Merge<Option<FileId>>,
943    store: &Store,
944    path: &RepoPath,
945    content: &[u8],
946    conflict_marker_style: ConflictMarkerStyle,
947    conflict_marker_len: usize,
948) -> BackendResult<Merge<Option<FileId>>> {
949    let simplified_file_ids = file_ids.simplify();
950
951    // First check if the new content is unchanged compared to the old content. If
952    // it is, we don't need parse the content or write any new objects to the
953    // store. This is also a way of making sure that unchanged tree/file
954    // conflicts (for example) are not converted to regular files in the working
955    // copy.
956    let mut old_content = Vec::with_capacity(content.len());
957    let merge_hunk = extract_as_single_hunk(&simplified_file_ids, store, path).await?;
958    materialize_merge_result_with_marker_len(
959        &merge_hunk,
960        conflict_marker_style,
961        conflict_marker_len,
962        &mut old_content,
963    )
964    .unwrap();
965    if content == old_content {
966        return Ok(file_ids.clone());
967    }
968
969    // Parse conflicts from the new content using the arity of the simplified
970    // conflicts.
971    let Some(mut hunks) = parse_conflict(
972        content,
973        simplified_file_ids.num_sides(),
974        conflict_marker_len,
975    ) else {
976        // Either there are no markers or they don't have the expected arity
977        let file_id = store.write_file(path, &mut &content[..]).await?;
978        return Ok(Merge::normal(file_id));
979    };
980
981    // If there is a conflict at the end of the file and a term ends with a newline,
982    // check whether the original term ended with a newline. If it didn't, then
983    // remove the newline since it was added automatically when materializing.
984    if let Some(last_hunk) = hunks.last_mut().filter(|hunk| !hunk.is_resolved()) {
985        for (original_content, term) in merge_hunk.iter().zip_eq(last_hunk.iter_mut()) {
986            if term.last() == Some(&b'\n') && has_no_eol(original_content) {
987                term.pop();
988            }
989        }
990    }
991
992    let mut contents = simplified_file_ids.map(|_| vec![]);
993    for hunk in hunks {
994        if let Some(slice) = hunk.as_resolved() {
995            for content in contents.iter_mut() {
996                content.extend_from_slice(slice);
997            }
998        } else {
999            for (content, slice) in zip(contents.iter_mut(), hunk.into_iter()) {
1000                content.extend(Vec::from(slice));
1001            }
1002        }
1003    }
1004
1005    // If the user edited the empty placeholder for an absent side, we consider the
1006    // conflict resolved.
1007    if zip(contents.iter(), simplified_file_ids.iter())
1008        .any(|(content, file_id)| file_id.is_none() && !content.is_empty())
1009    {
1010        let file_id = store.write_file(path, &mut &content[..]).await?;
1011        return Ok(Merge::normal(file_id));
1012    }
1013
1014    // Now write the new files contents we found by parsing the file with conflict
1015    // markers.
1016    // TODO: Write these concurrently
1017    let new_file_ids: Vec<Option<FileId>> = zip(contents.iter(), simplified_file_ids.iter())
1018        .map(|(content, file_id)| -> BackendResult<Option<FileId>> {
1019            match file_id {
1020                Some(_) => {
1021                    let file_id = store.write_file(path, &mut content.as_slice()).block_on()?;
1022                    Ok(Some(file_id))
1023                }
1024                None => {
1025                    // The missing side of a conflict is still represented by
1026                    // the empty string we materialized it as
1027                    Ok(None)
1028                }
1029            }
1030        })
1031        .try_collect()?;
1032
1033    // If the conflict was simplified, expand the conflict to the original
1034    // number of sides.
1035    let new_file_ids = if new_file_ids.len() != file_ids.iter().len() {
1036        file_ids
1037            .clone()
1038            .update_from_simplified(Merge::from_vec(new_file_ids))
1039    } else {
1040        Merge::from_vec(new_file_ids)
1041    };
1042    Ok(new_file_ids)
1043}
1044
1045#[cfg(test)]
1046mod tests {
1047    use super::*;
1048
1049    #[test]
1050    fn test_resolve_file_executable() {
1051        fn resolve<const N: usize>(values: [Option<bool>; N]) -> Option<bool> {
1052            resolve_file_executable(&Merge::from_vec(values.to_vec()))
1053        }
1054
1055        // already resolved
1056        assert_eq!(resolve([None]), None);
1057        assert_eq!(resolve([Some(false)]), Some(false));
1058        assert_eq!(resolve([Some(true)]), Some(true));
1059
1060        // trivially resolved
1061        assert_eq!(resolve([Some(true), Some(true), Some(true)]), Some(true));
1062        assert_eq!(resolve([Some(true), Some(false), Some(false)]), Some(true));
1063        assert_eq!(resolve([Some(false), Some(true), Some(false)]), Some(false));
1064        assert_eq!(resolve([None, None, Some(true)]), Some(true));
1065
1066        // unresolvable
1067        assert_eq!(resolve([Some(false), Some(true), None]), None);
1068
1069        // trivially resolved to absent, so pick the original state
1070        assert_eq!(resolve([Some(true), Some(true), None]), Some(true));
1071        assert_eq!(resolve([None, Some(false), Some(false)]), Some(false));
1072        assert_eq!(
1073            resolve([None, None, Some(true), Some(true), None]),
1074            Some(true)
1075        );
1076
1077        // trivially resolved to absent, and the original state is ambiguous
1078        assert_eq!(
1079            resolve([Some(true), Some(true), None, Some(false), Some(false)]),
1080            None
1081        );
1082        assert_eq!(
1083            resolve([
1084                None,
1085                Some(true),
1086                Some(true),
1087                Some(false),
1088                Some(false),
1089                Some(false),
1090                Some(false),
1091            ]),
1092            None
1093        );
1094    }
1095}