jj_lib/
conflicts.rs

1// Copyright 2020 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![allow(missing_docs)]
16
17use std::io;
18use std::io::Write;
19use std::iter::zip;
20use std::pin::Pin;
21
22use bstr::BString;
23use bstr::ByteSlice as _;
24use futures::stream::BoxStream;
25use futures::try_join;
26use futures::Stream;
27use futures::StreamExt as _;
28use itertools::Itertools as _;
29use pollster::FutureExt as _;
30use tokio::io::AsyncRead;
31use tokio::io::AsyncReadExt as _;
32
33use crate::backend::BackendError;
34use crate::backend::BackendResult;
35use crate::backend::CommitId;
36use crate::backend::CopyId;
37use crate::backend::FileId;
38use crate::backend::SymlinkId;
39use crate::backend::TreeId;
40use crate::backend::TreeValue;
41use crate::copies::CopiesTreeDiffEntry;
42use crate::copies::CopiesTreeDiffEntryPath;
43use crate::diff::Diff;
44use crate::diff::DiffHunk;
45use crate::diff::DiffHunkKind;
46use crate::files;
47use crate::files::MergeResult;
48use crate::merge::Merge;
49use crate::merge::MergedTreeValue;
50use crate::repo_path::RepoPath;
51use crate::store::Store;
52
53/// Minimum length of conflict markers.
54pub const MIN_CONFLICT_MARKER_LEN: usize = 7;
55
56/// If a file already contains lines which look like conflict markers of length
57/// N, then the conflict markers we add will be of length (N + increment). This
58/// number is chosen to make the conflict markers noticeably longer than the
59/// existing markers.
60const CONFLICT_MARKER_LEN_INCREMENT: usize = 4;
61
62/// Comment for missing terminating newline in a term of a conflict.
63const NO_EOL_COMMENT: &str = " (no terminating newline)";
64
65/// Comment for missing terminating newline in the "add" side of a diff.
66const ADD_NO_EOL_COMMENT: &str = " (removes terminating newline)";
67
68/// Comment for missing terminating newline in the "remove" side of a diff.
69const REMOVE_NO_EOL_COMMENT: &str = " (adds terminating newline)";
70
71fn write_diff_hunks(hunks: &[DiffHunk], file: &mut dyn Write) -> io::Result<()> {
72    for hunk in hunks {
73        match hunk.kind {
74            DiffHunkKind::Matching => {
75                debug_assert!(hunk.contents.iter().all_equal());
76                for line in hunk.contents[0].lines_with_terminator() {
77                    file.write_all(b" ")?;
78                    write_and_ensure_newline(file, line)?;
79                }
80            }
81            DiffHunkKind::Different => {
82                for line in hunk.contents[0].lines_with_terminator() {
83                    file.write_all(b"-")?;
84                    write_and_ensure_newline(file, line)?;
85                }
86                for line in hunk.contents[1].lines_with_terminator() {
87                    file.write_all(b"+")?;
88                    write_and_ensure_newline(file, line)?;
89                }
90            }
91        }
92    }
93    Ok(())
94}
95
96async fn get_file_contents(
97    store: &Store,
98    path: &RepoPath,
99    term: &Option<FileId>,
100) -> BackendResult<BString> {
101    match term {
102        Some(id) => {
103            let mut reader = store.read_file(path, id).await?;
104            let mut content = vec![];
105            reader
106                .read_to_end(&mut content)
107                .await
108                .map_err(|err| BackendError::ReadFile {
109                    path: path.to_owned(),
110                    id: id.clone(),
111                    source: err.into(),
112                })?;
113            Ok(BString::new(content))
114        }
115        // If the conflict had removed the file on one side, we pretend that the file
116        // was empty there.
117        None => Ok(BString::new(vec![])),
118    }
119}
120
121pub async fn extract_as_single_hunk(
122    merge: &Merge<Option<FileId>>,
123    store: &Store,
124    path: &RepoPath,
125) -> BackendResult<Merge<BString>> {
126    merge
127        .try_map_async(|term| get_file_contents(store, path, term))
128        .await
129}
130
131/// A type similar to `MergedTreeValue` but with associated data to include in
132/// e.g. the working copy or in a diff.
133pub enum MaterializedTreeValue {
134    Absent,
135    AccessDenied(Box<dyn std::error::Error + Send + Sync>),
136    File(MaterializedFileValue),
137    Symlink { id: SymlinkId, target: String },
138    FileConflict(MaterializedFileConflictValue),
139    OtherConflict { id: MergedTreeValue },
140    GitSubmodule(CommitId),
141    Tree(TreeId),
142}
143
144impl MaterializedTreeValue {
145    pub fn is_absent(&self) -> bool {
146        matches!(self, MaterializedTreeValue::Absent)
147    }
148
149    pub fn is_present(&self) -> bool {
150        !self.is_absent()
151    }
152}
153
154/// [`TreeValue::File`] with file content `reader`.
155pub struct MaterializedFileValue {
156    pub id: FileId,
157    pub executable: bool,
158    pub copy_id: CopyId,
159    pub reader: Pin<Box<dyn AsyncRead>>,
160}
161
162impl MaterializedFileValue {
163    /// Reads file content until EOF. The provided `path` is used only for error
164    /// reporting purpose.
165    pub async fn read_all(&mut self, path: &RepoPath) -> BackendResult<Vec<u8>> {
166        let mut buf = Vec::new();
167        self.reader
168            .read_to_end(&mut buf)
169            .await
170            .map_err(|err| BackendError::ReadFile {
171                path: path.to_owned(),
172                id: self.id.clone(),
173                source: err.into(),
174            })?;
175        Ok(buf)
176    }
177}
178
179/// Conflicted [`TreeValue::File`]s with file contents.
180pub struct MaterializedFileConflictValue {
181    /// File ids which preserve the shape of the tree conflict, to be used with
182    /// [`Merge::update_from_simplified()`].
183    pub unsimplified_ids: Merge<Option<FileId>>,
184    /// Simplified file ids, in which redundant id pairs are dropped.
185    pub ids: Merge<Option<FileId>>,
186    /// File contents corresponding to the simplified `ids`.
187    // TODO: or Vec<(FileId, Box<dyn Read>)> so that caller can stop reading
188    // when null bytes found?
189    pub contents: Merge<BString>,
190    /// Merged executable bit. `None` if there are changes in both executable
191    /// bit and file absence.
192    pub executable: Option<bool>,
193    /// Merged copy id. `None` if no single value could be determined.
194    pub copy_id: Option<CopyId>,
195}
196
197/// Reads the data associated with a `MergedTreeValue` so it can be written to
198/// e.g. the working copy or diff.
199pub async fn materialize_tree_value(
200    store: &Store,
201    path: &RepoPath,
202    value: MergedTreeValue,
203) -> BackendResult<MaterializedTreeValue> {
204    match materialize_tree_value_no_access_denied(store, path, value).await {
205        Err(BackendError::ReadAccessDenied { source, .. }) => {
206            Ok(MaterializedTreeValue::AccessDenied(source))
207        }
208        result => result,
209    }
210}
211
212async fn materialize_tree_value_no_access_denied(
213    store: &Store,
214    path: &RepoPath,
215    value: MergedTreeValue,
216) -> BackendResult<MaterializedTreeValue> {
217    match value.into_resolved() {
218        Ok(None) => Ok(MaterializedTreeValue::Absent),
219        Ok(Some(TreeValue::File {
220            id,
221            executable,
222            copy_id,
223        })) => {
224            let reader = store.read_file(path, &id).await?;
225            Ok(MaterializedTreeValue::File(MaterializedFileValue {
226                id,
227                executable,
228                copy_id,
229                reader,
230            }))
231        }
232        Ok(Some(TreeValue::Symlink(id))) => {
233            let target = store.read_symlink(path, &id).await?;
234            Ok(MaterializedTreeValue::Symlink { id, target })
235        }
236        Ok(Some(TreeValue::GitSubmodule(id))) => Ok(MaterializedTreeValue::GitSubmodule(id)),
237        Ok(Some(TreeValue::Tree(id))) => Ok(MaterializedTreeValue::Tree(id)),
238        Ok(Some(TreeValue::Conflict(_))) => {
239            panic!("cannot materialize legacy conflict object at path {path:?}");
240        }
241        Err(conflict) => match try_materialize_file_conflict_value(store, path, &conflict).await? {
242            Some(file) => Ok(MaterializedTreeValue::FileConflict(file)),
243            None => Ok(MaterializedTreeValue::OtherConflict { id: conflict }),
244        },
245    }
246}
247
248/// Suppose `conflict` contains only files or absent entries, reads the file
249/// contents.
250pub async fn try_materialize_file_conflict_value(
251    store: &Store,
252    path: &RepoPath,
253    conflict: &MergedTreeValue,
254) -> BackendResult<Option<MaterializedFileConflictValue>> {
255    let (Some(unsimplified_ids), Some(executable_bits)) =
256        (conflict.to_file_merge(), conflict.to_executable_merge())
257    else {
258        return Ok(None);
259    };
260    let ids = unsimplified_ids.simplify();
261    let contents = extract_as_single_hunk(&ids, store, path).await?;
262    let executable = resolve_file_executable(&executable_bits);
263    Ok(Some(MaterializedFileConflictValue {
264        unsimplified_ids,
265        ids,
266        contents,
267        executable,
268        copy_id: Some(CopyId::placeholder()),
269    }))
270}
271
272/// Resolves conflicts in file executable bit, returns the original state if the
273/// file is deleted and executable bit is unchanged.
274pub fn resolve_file_executable(merge: &Merge<Option<bool>>) -> Option<bool> {
275    let resolved = merge.resolve_trivial().copied()?;
276    if resolved.is_some() {
277        resolved
278    } else {
279        // If the merge is resolved to None (absent), there should be the same
280        // number of Some(true) and Some(false). Pick the old state if
281        // unambiguous, so the new file inherits the original executable bit.
282        merge.removes().flatten().copied().all_equal_value().ok()
283    }
284}
285
286/// Describes what style should be used when materializing conflicts.
287#[derive(Clone, Copy, PartialEq, Eq, Debug, Default, serde::Deserialize)]
288#[serde(rename_all = "kebab-case")]
289pub enum ConflictMarkerStyle {
290    /// Style which shows a snapshot and a series of diffs to apply.
291    #[default]
292    Diff,
293    /// Style which shows a snapshot for each base and side.
294    Snapshot,
295    /// Style which replicates Git's "diff3" style to support external tools.
296    Git,
297}
298
299/// Characters which can be repeated to form a conflict marker line when
300/// materializing and parsing conflicts.
301#[derive(Clone, Copy, PartialEq, Eq)]
302#[repr(u8)]
303enum ConflictMarkerLineChar {
304    ConflictStart = b'<',
305    ConflictEnd = b'>',
306    Add = b'+',
307    Remove = b'-',
308    Diff = b'%',
309    GitAncestor = b'|',
310    GitSeparator = b'=',
311}
312
313impl ConflictMarkerLineChar {
314    /// Get the ASCII byte used for this conflict marker.
315    fn to_byte(self) -> u8 {
316        self as u8
317    }
318
319    /// Parse a byte to see if it corresponds with any kind of conflict marker.
320    fn parse_byte(byte: u8) -> Option<Self> {
321        match byte {
322            b'<' => Some(Self::ConflictStart),
323            b'>' => Some(Self::ConflictEnd),
324            b'+' => Some(Self::Add),
325            b'-' => Some(Self::Remove),
326            b'%' => Some(Self::Diff),
327            b'|' => Some(Self::GitAncestor),
328            b'=' => Some(Self::GitSeparator),
329            _ => None,
330        }
331    }
332}
333
334/// Represents a conflict marker line parsed from the file. Conflict marker
335/// lines consist of a single ASCII character repeated for a certain length.
336struct ConflictMarkerLine {
337    kind: ConflictMarkerLineChar,
338    len: usize,
339}
340
341/// Write a conflict marker to an output file.
342fn write_conflict_marker(
343    output: &mut dyn Write,
344    kind: ConflictMarkerLineChar,
345    len: usize,
346    suffix_text: &str,
347) -> io::Result<()> {
348    let conflict_marker = BString::new(vec![kind.to_byte(); len]);
349
350    if suffix_text.is_empty() {
351        writeln!(output, "{conflict_marker}")
352    } else {
353        writeln!(output, "{conflict_marker} {suffix_text}")
354    }
355}
356
357/// Parse a conflict marker from a line of a file. The conflict marker may have
358/// any length (even less than MIN_CONFLICT_MARKER_LEN).
359fn parse_conflict_marker_any_len(line: &[u8]) -> Option<ConflictMarkerLine> {
360    let first_byte = *line.first()?;
361    let kind = ConflictMarkerLineChar::parse_byte(first_byte)?;
362    let len = line.iter().take_while(|&&b| b == first_byte).count();
363
364    if let Some(next_byte) = line.get(len) {
365        // If there is a character after the marker, it must be ASCII whitespace
366        if !next_byte.is_ascii_whitespace() {
367            return None;
368        }
369    }
370
371    Some(ConflictMarkerLine { kind, len })
372}
373
374/// Parse a conflict marker, expecting it to be at least a certain length. Any
375/// shorter conflict markers are ignored.
376fn parse_conflict_marker(line: &[u8], expected_len: usize) -> Option<ConflictMarkerLineChar> {
377    parse_conflict_marker_any_len(line)
378        .filter(|marker| marker.len >= expected_len)
379        .map(|marker| marker.kind)
380}
381
382/// Given a Merge of files, choose the conflict marker length to use when
383/// materializing conflicts.
384pub fn choose_materialized_conflict_marker_len<T: AsRef<[u8]>>(single_hunk: &Merge<T>) -> usize {
385    let max_existing_marker_len = single_hunk
386        .iter()
387        .flat_map(|file| file.as_ref().lines_with_terminator())
388        .filter_map(parse_conflict_marker_any_len)
389        .map(|marker| marker.len)
390        .max()
391        .unwrap_or_default();
392
393    max_existing_marker_len
394        .saturating_add(CONFLICT_MARKER_LEN_INCREMENT)
395        .max(MIN_CONFLICT_MARKER_LEN)
396}
397
398pub fn materialize_merge_result<T: AsRef<[u8]>>(
399    single_hunk: &Merge<T>,
400    conflict_marker_style: ConflictMarkerStyle,
401    output: &mut dyn Write,
402) -> io::Result<()> {
403    let merge_result = files::merge_hunks(single_hunk);
404    match &merge_result {
405        MergeResult::Resolved(content) => output.write_all(content),
406        MergeResult::Conflict(hunks) => {
407            let conflict_marker_len = choose_materialized_conflict_marker_len(single_hunk);
408            materialize_conflict_hunks(hunks, conflict_marker_style, conflict_marker_len, output)
409        }
410    }
411}
412
413pub fn materialize_merge_result_with_marker_len<T: AsRef<[u8]>>(
414    single_hunk: &Merge<T>,
415    conflict_marker_style: ConflictMarkerStyle,
416    conflict_marker_len: usize,
417    output: &mut dyn Write,
418) -> io::Result<()> {
419    let merge_result = files::merge_hunks(single_hunk);
420    match &merge_result {
421        MergeResult::Resolved(content) => output.write_all(content),
422        MergeResult::Conflict(hunks) => {
423            materialize_conflict_hunks(hunks, conflict_marker_style, conflict_marker_len, output)
424        }
425    }
426}
427
428pub fn materialize_merge_result_to_bytes<T: AsRef<[u8]>>(
429    single_hunk: &Merge<T>,
430    conflict_marker_style: ConflictMarkerStyle,
431) -> BString {
432    let merge_result = files::merge_hunks(single_hunk);
433    match merge_result {
434        MergeResult::Resolved(content) => content,
435        MergeResult::Conflict(hunks) => {
436            let conflict_marker_len = choose_materialized_conflict_marker_len(single_hunk);
437            let mut output = Vec::new();
438            materialize_conflict_hunks(
439                &hunks,
440                conflict_marker_style,
441                conflict_marker_len,
442                &mut output,
443            )
444            .expect("writing to an in-memory buffer should never fail");
445            output.into()
446        }
447    }
448}
449
450pub fn materialize_merge_result_to_bytes_with_marker_len<T: AsRef<[u8]>>(
451    single_hunk: &Merge<T>,
452    conflict_marker_style: ConflictMarkerStyle,
453    conflict_marker_len: usize,
454) -> BString {
455    let merge_result = files::merge_hunks(single_hunk);
456    match merge_result {
457        MergeResult::Resolved(content) => content,
458        MergeResult::Conflict(hunks) => {
459            let mut output = Vec::new();
460            materialize_conflict_hunks(
461                &hunks,
462                conflict_marker_style,
463                conflict_marker_len,
464                &mut output,
465            )
466            .expect("writing to an in-memory buffer should never fail");
467            output.into()
468        }
469    }
470}
471
472fn materialize_conflict_hunks(
473    hunks: &[Merge<BString>],
474    conflict_marker_style: ConflictMarkerStyle,
475    conflict_marker_len: usize,
476    output: &mut dyn Write,
477) -> io::Result<()> {
478    let num_conflicts = hunks
479        .iter()
480        .filter(|hunk| hunk.as_resolved().is_none())
481        .count();
482    let mut conflict_index = 0;
483    for hunk in hunks {
484        if let Some(content) = hunk.as_resolved() {
485            output.write_all(content)?;
486        } else {
487            conflict_index += 1;
488            let conflict_info = format!("Conflict {conflict_index} of {num_conflicts}");
489
490            match (conflict_marker_style, hunk.as_slice()) {
491                // 2-sided conflicts can use Git-style conflict markers
492                (ConflictMarkerStyle::Git, [left, base, right]) => {
493                    materialize_git_style_conflict(
494                        left,
495                        base,
496                        right,
497                        &conflict_info,
498                        conflict_marker_len,
499                        output,
500                    )?;
501                }
502                _ => {
503                    materialize_jj_style_conflict(
504                        hunk,
505                        &conflict_info,
506                        conflict_marker_style,
507                        conflict_marker_len,
508                        output,
509                    )?;
510                }
511            }
512        }
513    }
514    Ok(())
515}
516
517fn materialize_git_style_conflict(
518    left: &[u8],
519    base: &[u8],
520    right: &[u8],
521    conflict_info: &str,
522    conflict_marker_len: usize,
523    output: &mut dyn Write,
524) -> io::Result<()> {
525    write_conflict_marker(
526        output,
527        ConflictMarkerLineChar::ConflictStart,
528        conflict_marker_len,
529        &format!("Side #1 ({conflict_info})"),
530    )?;
531    write_and_ensure_newline(output, left)?;
532
533    write_conflict_marker(
534        output,
535        ConflictMarkerLineChar::GitAncestor,
536        conflict_marker_len,
537        "Base",
538    )?;
539    write_and_ensure_newline(output, base)?;
540
541    // VS Code doesn't seem to support any trailing text on the separator line
542    write_conflict_marker(
543        output,
544        ConflictMarkerLineChar::GitSeparator,
545        conflict_marker_len,
546        "",
547    )?;
548
549    write_and_ensure_newline(output, right)?;
550    write_conflict_marker(
551        output,
552        ConflictMarkerLineChar::ConflictEnd,
553        conflict_marker_len,
554        &format!("Side #2 ({conflict_info} ends)"),
555    )?;
556
557    Ok(())
558}
559
560fn materialize_jj_style_conflict(
561    hunk: &Merge<BString>,
562    conflict_info: &str,
563    conflict_marker_style: ConflictMarkerStyle,
564    conflict_marker_len: usize,
565    output: &mut dyn Write,
566) -> io::Result<()> {
567    // Write a positive snapshot (side) of a conflict
568    let write_side = |add_index: usize, data: &[u8], output: &mut dyn Write| {
569        write_conflict_marker(
570            output,
571            ConflictMarkerLineChar::Add,
572            conflict_marker_len,
573            &format!(
574                "Contents of side #{}{}",
575                add_index + 1,
576                maybe_no_eol_comment(data)
577            ),
578        )?;
579        write_and_ensure_newline(output, data)
580    };
581
582    // Write a negative snapshot (base) of a conflict
583    let write_base = |base_str: &str, data: &[u8], output: &mut dyn Write| {
584        write_conflict_marker(
585            output,
586            ConflictMarkerLineChar::Remove,
587            conflict_marker_len,
588            &format!("Contents of {base_str}{}", maybe_no_eol_comment(data)),
589        )?;
590        write_and_ensure_newline(output, data)
591    };
592
593    // Write a diff from a negative term to a positive term
594    let write_diff =
595        |base_str: &str, add_index: usize, diff: &[DiffHunk], output: &mut dyn Write| {
596            let no_eol_remove = diff
597                .last()
598                .is_some_and(|diff_hunk| has_no_eol(diff_hunk.contents[0]));
599            let no_eol_add = diff
600                .last()
601                .is_some_and(|diff_hunk| has_no_eol(diff_hunk.contents[1]));
602            let no_eol_comment = match (no_eol_remove, no_eol_add) {
603                (true, true) => NO_EOL_COMMENT,
604                (true, _) => REMOVE_NO_EOL_COMMENT,
605                (_, true) => ADD_NO_EOL_COMMENT,
606                _ => "",
607            };
608            write_conflict_marker(
609                output,
610                ConflictMarkerLineChar::Diff,
611                conflict_marker_len,
612                &format!(
613                    "Changes from {base_str} to side #{}{no_eol_comment}",
614                    add_index + 1
615                ),
616            )?;
617            write_diff_hunks(diff, output)
618        };
619
620    write_conflict_marker(
621        output,
622        ConflictMarkerLineChar::ConflictStart,
623        conflict_marker_len,
624        conflict_info,
625    )?;
626    let mut add_index = 0;
627    for (base_index, left) in hunk.removes().enumerate() {
628        // The vast majority of conflicts one actually tries to resolve manually have 1
629        // base.
630        let base_str = if hunk.removes().len() == 1 {
631            "base".to_string()
632        } else {
633            format!("base #{}", base_index + 1)
634        };
635
636        let Some(right1) = hunk.get_add(add_index) else {
637            // If we have no more positive terms, emit the remaining negative terms as
638            // snapshots.
639            write_base(&base_str, left, output)?;
640            continue;
641        };
642
643        // For any style other than "diff", always emit sides and bases separately
644        if conflict_marker_style != ConflictMarkerStyle::Diff {
645            write_side(add_index, right1, output)?;
646            write_base(&base_str, left, output)?;
647            add_index += 1;
648            continue;
649        }
650
651        let diff1 = Diff::by_line([&left, &right1]).hunks().collect_vec();
652        // Check if the diff against the next positive term is better. Since we want to
653        // preserve the order of the terms, we don't match against any later positive
654        // terms.
655        if let Some(right2) = hunk.get_add(add_index + 1) {
656            let diff2 = Diff::by_line([&left, &right2]).hunks().collect_vec();
657            if diff_size(&diff2) < diff_size(&diff1) {
658                // If the next positive term is a better match, emit the current positive term
659                // as a snapshot and the next positive term as a diff.
660                write_side(add_index, right1, output)?;
661                write_diff(&base_str, add_index + 1, &diff2, output)?;
662                add_index += 2;
663                continue;
664            }
665        }
666
667        write_diff(&base_str, add_index, &diff1, output)?;
668        add_index += 1;
669    }
670
671    // Emit the remaining positive terms as snapshots.
672    for (add_index, slice) in hunk.adds().enumerate().skip(add_index) {
673        write_side(add_index, slice, output)?;
674    }
675    write_conflict_marker(
676        output,
677        ConflictMarkerLineChar::ConflictEnd,
678        conflict_marker_len,
679        &format!("{conflict_info} ends"),
680    )?;
681    Ok(())
682}
683
684fn maybe_no_eol_comment(slice: &[u8]) -> &'static str {
685    if has_no_eol(slice) {
686        NO_EOL_COMMENT
687    } else {
688        ""
689    }
690}
691
692// Write a chunk of data, ensuring that it doesn't end with a line which is
693// missing its terminating newline.
694fn write_and_ensure_newline(output: &mut dyn Write, data: &[u8]) -> io::Result<()> {
695    output.write_all(data)?;
696    if has_no_eol(data) {
697        writeln!(output)?;
698    }
699    Ok(())
700}
701
702// Check whether a slice is missing its terminating newline character.
703fn has_no_eol(slice: &[u8]) -> bool {
704    slice.last().is_some_and(|&last| last != b'\n')
705}
706
707fn diff_size(hunks: &[DiffHunk]) -> usize {
708    hunks
709        .iter()
710        .map(|hunk| match hunk.kind {
711            DiffHunkKind::Matching => 0,
712            DiffHunkKind::Different => hunk.contents.iter().map(|content| content.len()).sum(),
713        })
714        .sum()
715}
716
717pub struct MaterializedTreeDiffEntry {
718    pub path: CopiesTreeDiffEntryPath,
719    pub values: BackendResult<(MaterializedTreeValue, MaterializedTreeValue)>,
720}
721
722pub fn materialized_diff_stream<'a>(
723    store: &'a Store,
724    tree_diff: BoxStream<'a, CopiesTreeDiffEntry>,
725) -> impl Stream<Item = MaterializedTreeDiffEntry> + use<'a> {
726    tree_diff
727        .map(|CopiesTreeDiffEntry { path, values }| async {
728            match values {
729                Err(err) => MaterializedTreeDiffEntry {
730                    path,
731                    values: Err(err),
732                },
733                Ok((before, after)) => {
734                    let before_future = materialize_tree_value(store, path.source(), before);
735                    let after_future = materialize_tree_value(store, path.target(), after);
736                    let values = try_join!(before_future, after_future);
737                    MaterializedTreeDiffEntry { path, values }
738                }
739            }
740        })
741        .buffered((store.concurrency() / 2).max(1))
742}
743
744/// Parses conflict markers from a slice.
745///
746/// Returns `None` if there were no valid conflict markers. The caller
747/// has to provide the expected number of merge sides (adds). Conflict
748/// markers that are otherwise valid will be considered invalid if
749/// they don't have the expected arity.
750///
751/// All conflict markers in the file must be at least as long as the expected
752/// length. Any shorter conflict markers will be ignored.
753// TODO: "parse" is not usually the opposite of "materialize", so maybe we
754// should rename them to "serialize" and "deserialize"?
755pub fn parse_conflict(
756    input: &[u8],
757    num_sides: usize,
758    expected_marker_len: usize,
759) -> Option<Vec<Merge<BString>>> {
760    if input.is_empty() {
761        return None;
762    }
763    let mut hunks = vec![];
764    let mut pos = 0;
765    let mut resolved_start = 0;
766    let mut conflict_start = None;
767    let mut conflict_start_len = 0;
768    for line in input.lines_with_terminator() {
769        match parse_conflict_marker(line, expected_marker_len) {
770            Some(ConflictMarkerLineChar::ConflictStart) => {
771                conflict_start = Some(pos);
772                conflict_start_len = line.len();
773            }
774            Some(ConflictMarkerLineChar::ConflictEnd) => {
775                if let Some(conflict_start_index) = conflict_start.take() {
776                    let conflict_body = &input[conflict_start_index + conflict_start_len..pos];
777                    let hunk = parse_conflict_hunk(conflict_body, expected_marker_len);
778                    if hunk.num_sides() == num_sides {
779                        let resolved_slice = &input[resolved_start..conflict_start_index];
780                        if !resolved_slice.is_empty() {
781                            hunks.push(Merge::resolved(BString::from(resolved_slice)));
782                        }
783                        hunks.push(hunk);
784                        resolved_start = pos + line.len();
785                    }
786                }
787            }
788            _ => {}
789        }
790        pos += line.len();
791    }
792
793    if hunks.is_empty() {
794        None
795    } else {
796        if resolved_start < input.len() {
797            hunks.push(Merge::resolved(BString::from(&input[resolved_start..])));
798        }
799        Some(hunks)
800    }
801}
802
803/// This method handles parsing both JJ-style and Git-style conflict markers,
804/// meaning that switching conflict marker styles won't prevent existing files
805/// with other conflict marker styles from being parsed successfully. The
806/// conflict marker style to use for parsing is determined based on the first
807/// line of the hunk.
808fn parse_conflict_hunk(input: &[u8], expected_marker_len: usize) -> Merge<BString> {
809    // If the hunk starts with a conflict marker, find its first character
810    let initial_conflict_marker = input
811        .lines_with_terminator()
812        .next()
813        .and_then(|line| parse_conflict_marker(line, expected_marker_len));
814
815    match initial_conflict_marker {
816        // JJ-style conflicts must start with one of these 3 conflict marker lines
817        Some(
818            ConflictMarkerLineChar::Diff
819            | ConflictMarkerLineChar::Remove
820            | ConflictMarkerLineChar::Add,
821        ) => parse_jj_style_conflict_hunk(input, expected_marker_len),
822        // Git-style conflicts either must not start with a conflict marker line, or must start with
823        // the "|||||||" conflict marker line (if the first side was empty)
824        None | Some(ConflictMarkerLineChar::GitAncestor) => {
825            parse_git_style_conflict_hunk(input, expected_marker_len)
826        }
827        // No other conflict markers are allowed at the start of a hunk
828        Some(_) => Merge::resolved(BString::new(vec![])),
829    }
830}
831
832fn parse_jj_style_conflict_hunk(input: &[u8], expected_marker_len: usize) -> Merge<BString> {
833    enum State {
834        Diff,
835        Remove,
836        Add,
837        Unknown,
838    }
839    let mut state = State::Unknown;
840    let mut removes = vec![];
841    let mut adds = vec![];
842    for line in input.lines_with_terminator() {
843        match parse_conflict_marker(line, expected_marker_len) {
844            Some(ConflictMarkerLineChar::Diff) => {
845                state = State::Diff;
846                removes.push(BString::new(vec![]));
847                adds.push(BString::new(vec![]));
848                continue;
849            }
850            Some(ConflictMarkerLineChar::Remove) => {
851                state = State::Remove;
852                removes.push(BString::new(vec![]));
853                continue;
854            }
855            Some(ConflictMarkerLineChar::Add) => {
856                state = State::Add;
857                adds.push(BString::new(vec![]));
858                continue;
859            }
860            _ => {}
861        }
862        match state {
863            State::Diff => {
864                if let Some(rest) = line.strip_prefix(b"-") {
865                    removes.last_mut().unwrap().extend_from_slice(rest);
866                } else if let Some(rest) = line.strip_prefix(b"+") {
867                    adds.last_mut().unwrap().extend_from_slice(rest);
868                } else if let Some(rest) = line.strip_prefix(b" ") {
869                    removes.last_mut().unwrap().extend_from_slice(rest);
870                    adds.last_mut().unwrap().extend_from_slice(rest);
871                } else if line == b"\n" || line == b"\r\n" {
872                    // Some editors strip trailing whitespace, so " \n" might become "\n". It would
873                    // be unfortunate if this prevented the conflict from being parsed, so we add
874                    // the empty line to the "remove" and "add" as if there was a space in front
875                    removes.last_mut().unwrap().extend_from_slice(line);
876                    adds.last_mut().unwrap().extend_from_slice(line);
877                } else {
878                    // Doesn't look like a valid conflict
879                    return Merge::resolved(BString::new(vec![]));
880                }
881            }
882            State::Remove => {
883                removes.last_mut().unwrap().extend_from_slice(line);
884            }
885            State::Add => {
886                adds.last_mut().unwrap().extend_from_slice(line);
887            }
888            State::Unknown => {
889                // Doesn't look like a valid conflict
890                return Merge::resolved(BString::new(vec![]));
891            }
892        }
893    }
894
895    if adds.len() == removes.len() + 1 {
896        Merge::from_removes_adds(removes, adds)
897    } else {
898        // Doesn't look like a valid conflict
899        Merge::resolved(BString::new(vec![]))
900    }
901}
902
903fn parse_git_style_conflict_hunk(input: &[u8], expected_marker_len: usize) -> Merge<BString> {
904    #[derive(PartialEq, Eq)]
905    enum State {
906        Left,
907        Base,
908        Right,
909    }
910    let mut state = State::Left;
911    let mut left = BString::new(vec![]);
912    let mut base = BString::new(vec![]);
913    let mut right = BString::new(vec![]);
914    for line in input.lines_with_terminator() {
915        match parse_conflict_marker(line, expected_marker_len) {
916            Some(ConflictMarkerLineChar::GitAncestor) => {
917                if state == State::Left {
918                    state = State::Base;
919                    continue;
920                } else {
921                    // Base must come after left
922                    return Merge::resolved(BString::new(vec![]));
923                }
924            }
925            Some(ConflictMarkerLineChar::GitSeparator) => {
926                if state == State::Base {
927                    state = State::Right;
928                    continue;
929                } else {
930                    // Right must come after base
931                    return Merge::resolved(BString::new(vec![]));
932                }
933            }
934            _ => {}
935        }
936        match state {
937            State::Left => left.extend_from_slice(line),
938            State::Base => base.extend_from_slice(line),
939            State::Right => right.extend_from_slice(line),
940        }
941    }
942
943    if state == State::Right {
944        Merge::from_vec(vec![left, base, right])
945    } else {
946        // Doesn't look like a valid conflict
947        Merge::resolved(BString::new(vec![]))
948    }
949}
950
951/// Parses conflict markers in `content` and returns an updated version of
952/// `file_ids` with the new contents. If no (valid) conflict markers remain, a
953/// single resolves `FileId` will be returned.
954pub async fn update_from_content(
955    file_ids: &Merge<Option<FileId>>,
956    store: &Store,
957    path: &RepoPath,
958    content: &[u8],
959    conflict_marker_style: ConflictMarkerStyle,
960    conflict_marker_len: usize,
961) -> BackendResult<Merge<Option<FileId>>> {
962    let simplified_file_ids = file_ids.simplify();
963
964    // First check if the new content is unchanged compared to the old content. If
965    // it is, we don't need parse the content or write any new objects to the
966    // store. This is also a way of making sure that unchanged tree/file
967    // conflicts (for example) are not converted to regular files in the working
968    // copy.
969    let mut old_content = Vec::with_capacity(content.len());
970    let merge_hunk = extract_as_single_hunk(&simplified_file_ids, store, path).await?;
971    materialize_merge_result_with_marker_len(
972        &merge_hunk,
973        conflict_marker_style,
974        conflict_marker_len,
975        &mut old_content,
976    )
977    .unwrap();
978    if content == old_content {
979        return Ok(file_ids.clone());
980    }
981
982    // Parse conflicts from the new content using the arity of the simplified
983    // conflicts.
984    let Some(mut hunks) = parse_conflict(
985        content,
986        simplified_file_ids.num_sides(),
987        conflict_marker_len,
988    ) else {
989        // Either there are no markers or they don't have the expected arity
990        let file_id = store.write_file(path, &mut &content[..]).await?;
991        return Ok(Merge::normal(file_id));
992    };
993
994    // If there is a conflict at the end of the file and a term ends with a newline,
995    // check whether the original term ended with a newline. If it didn't, then
996    // remove the newline since it was added automatically when materializing.
997    if let Some(last_hunk) = hunks.last_mut().filter(|hunk| !hunk.is_resolved()) {
998        for (original_content, term) in merge_hunk.iter().zip_eq(last_hunk.iter_mut()) {
999            if term.last() == Some(&b'\n') && has_no_eol(original_content) {
1000                term.pop();
1001            }
1002        }
1003    }
1004
1005    let mut contents = simplified_file_ids.map(|_| vec![]);
1006    for hunk in hunks {
1007        if let Some(slice) = hunk.as_resolved() {
1008            for content in contents.iter_mut() {
1009                content.extend_from_slice(slice);
1010            }
1011        } else {
1012            for (content, slice) in zip(contents.iter_mut(), hunk.into_iter()) {
1013                content.extend(Vec::from(slice));
1014            }
1015        }
1016    }
1017
1018    // If the user edited the empty placeholder for an absent side, we consider the
1019    // conflict resolved.
1020    if zip(contents.iter(), simplified_file_ids.iter())
1021        .any(|(content, file_id)| file_id.is_none() && !content.is_empty())
1022    {
1023        let file_id = store.write_file(path, &mut &content[..]).await?;
1024        return Ok(Merge::normal(file_id));
1025    }
1026
1027    // Now write the new files contents we found by parsing the file with conflict
1028    // markers.
1029    // TODO: Write these concurrently
1030    let new_file_ids: Vec<Option<FileId>> = zip(contents.iter(), simplified_file_ids.iter())
1031        .map(|(content, file_id)| -> BackendResult<Option<FileId>> {
1032            match file_id {
1033                Some(_) => {
1034                    let file_id = store.write_file(path, &mut content.as_slice()).block_on()?;
1035                    Ok(Some(file_id))
1036                }
1037                None => {
1038                    // The missing side of a conflict is still represented by
1039                    // the empty string we materialized it as
1040                    Ok(None)
1041                }
1042            }
1043        })
1044        .try_collect()?;
1045
1046    // If the conflict was simplified, expand the conflict to the original
1047    // number of sides.
1048    let new_file_ids = if new_file_ids.len() != file_ids.iter().len() {
1049        file_ids
1050            .clone()
1051            .update_from_simplified(Merge::from_vec(new_file_ids))
1052    } else {
1053        Merge::from_vec(new_file_ids)
1054    };
1055    Ok(new_file_ids)
1056}
1057
1058#[cfg(test)]
1059mod tests {
1060    use super::*;
1061
1062    #[test]
1063    fn test_resolve_file_executable() {
1064        fn resolve<const N: usize>(values: [Option<bool>; N]) -> Option<bool> {
1065            resolve_file_executable(&Merge::from_vec(values.to_vec()))
1066        }
1067
1068        // already resolved
1069        assert_eq!(resolve([None]), None);
1070        assert_eq!(resolve([Some(false)]), Some(false));
1071        assert_eq!(resolve([Some(true)]), Some(true));
1072
1073        // trivially resolved
1074        assert_eq!(resolve([Some(true), Some(true), Some(true)]), Some(true));
1075        assert_eq!(resolve([Some(true), Some(false), Some(false)]), Some(true));
1076        assert_eq!(resolve([Some(false), Some(true), Some(false)]), Some(false));
1077        assert_eq!(resolve([None, None, Some(true)]), Some(true));
1078
1079        // unresolvable
1080        assert_eq!(resolve([Some(false), Some(true), None]), None);
1081
1082        // trivially resolved to absent, so pick the original state
1083        assert_eq!(resolve([Some(true), Some(true), None]), Some(true));
1084        assert_eq!(resolve([None, Some(false), Some(false)]), Some(false));
1085        assert_eq!(
1086            resolve([None, None, Some(true), Some(true), None]),
1087            Some(true)
1088        );
1089
1090        // trivially resolved to absent, and the original state is ambiguous
1091        assert_eq!(
1092            resolve([Some(true), Some(true), None, Some(false), Some(false)]),
1093            None
1094        );
1095        assert_eq!(
1096            resolve([
1097                None,
1098                Some(true),
1099                Some(true),
1100                Some(false),
1101                Some(false),
1102                Some(false),
1103                Some(false),
1104            ]),
1105            None
1106        );
1107    }
1108}