jujutsu_lib/
conflicts.rs

1// Copyright 2020 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::io::{Cursor, Write};
16
17use itertools::Itertools;
18
19use crate::backend::{BackendResult, Conflict, ConflictId, ConflictPart, ObjectId, TreeValue};
20use crate::diff::{find_line_ranges, Diff, DiffHunk};
21use crate::files;
22use crate::files::{ConflictHunk, MergeHunk, MergeResult};
23use crate::repo_path::RepoPath;
24use crate::store::Store;
25
26const CONFLICT_START_LINE: &[u8] = b"<<<<<<<\n";
27const CONFLICT_END_LINE: &[u8] = b">>>>>>>\n";
28const CONFLICT_DIFF_LINE: &[u8] = b"%%%%%%%\n";
29const CONFLICT_MINUS_LINE: &[u8] = b"-------\n";
30const CONFLICT_PLUS_LINE: &[u8] = b"+++++++\n";
31
32fn describe_conflict_part(part: &ConflictPart) -> String {
33    match &part.value {
34        TreeValue::File {
35            id,
36            executable: false,
37        } => {
38            format!("file with id {}", id.hex())
39        }
40        TreeValue::File {
41            id,
42            executable: true,
43        } => {
44            format!("executable file with id {}", id.hex())
45        }
46        TreeValue::Symlink(id) => {
47            format!("symlink with id {}", id.hex())
48        }
49        TreeValue::Tree(id) => {
50            format!("tree with id {}", id.hex())
51        }
52        TreeValue::GitSubmodule(id) => {
53            format!("Git submodule with id {}", id.hex())
54        }
55        TreeValue::Conflict(id) => {
56            format!("Conflict with id {}", id.hex())
57        }
58    }
59}
60
61/// Give a summary description of a conflict's "adds" and "removes"
62pub fn describe_conflict(conflict: &Conflict, file: &mut dyn Write) -> std::io::Result<()> {
63    file.write_all(b"Conflict:\n")?;
64    for part in &conflict.removes {
65        file.write_all(format!("  Removing {}\n", describe_conflict_part(part)).as_bytes())?;
66    }
67    for part in &conflict.adds {
68        file.write_all(format!("  Adding {}\n", describe_conflict_part(part)).as_bytes())?;
69    }
70    Ok(())
71}
72
73fn file_parts(parts: &[ConflictPart]) -> Vec<&ConflictPart> {
74    parts
75        .iter()
76        .filter(|part| {
77            matches!(
78                part.value,
79                TreeValue::File {
80                    executable: false,
81                    ..
82                }
83            )
84        })
85        .collect_vec()
86}
87
88fn get_file_contents(store: &Store, path: &RepoPath, part: &ConflictPart) -> Vec<u8> {
89    if let TreeValue::File {
90        id,
91        executable: false,
92    } = &part.value
93    {
94        let mut content: Vec<u8> = vec![];
95        store
96            .read_file(path, id)
97            .unwrap()
98            .read_to_end(&mut content)
99            .unwrap();
100        content
101    } else {
102        panic!("unexpectedly got a non-file conflict part");
103    }
104}
105
106fn write_diff_hunks(hunks: &[DiffHunk], file: &mut dyn Write) -> std::io::Result<()> {
107    for hunk in hunks {
108        match hunk {
109            DiffHunk::Matching(content) => {
110                for line in content.split_inclusive(|b| *b == b'\n') {
111                    file.write_all(b" ")?;
112                    file.write_all(line)?;
113                }
114            }
115            DiffHunk::Different(content) => {
116                for line in content[0].split_inclusive(|b| *b == b'\n') {
117                    file.write_all(b"-")?;
118                    file.write_all(line)?;
119                }
120                for line in content[1].split_inclusive(|b| *b == b'\n') {
121                    file.write_all(b"+")?;
122                    file.write_all(line)?;
123                }
124            }
125        }
126    }
127    Ok(())
128}
129
130pub fn materialize_conflict(
131    store: &Store,
132    path: &RepoPath,
133    conflict: &Conflict,
134    output: &mut dyn Write,
135) -> std::io::Result<()> {
136    match extract_file_conflict_as_single_hunk(store, path, conflict) {
137        None => {
138            // Unless all parts are regular files, we can't do much better than to try to
139            // describe the conflict.
140            describe_conflict(conflict, output)
141        }
142        Some(content) => materialize_merge_result(&content, output),
143    }
144}
145
146/// Only works if all parts of the conflict are regular, non-executable files
147pub fn extract_file_conflict_as_single_hunk(
148    store: &Store,
149    path: &RepoPath,
150    conflict: &Conflict,
151) -> Option<ConflictHunk> {
152    let file_adds = file_parts(&conflict.adds);
153    let file_removes = file_parts(&conflict.removes);
154    if file_adds.len() != conflict.adds.len() || file_removes.len() != conflict.removes.len() {
155        return None;
156    }
157    let added_content = file_adds
158        .iter()
159        .map(|part| get_file_contents(store, path, part))
160        .collect_vec();
161    let removed_content = file_removes
162        .iter()
163        .map(|part| get_file_contents(store, path, part))
164        .collect_vec();
165
166    Some(ConflictHunk {
167        removes: removed_content,
168        adds: added_content,
169    })
170}
171
172pub fn materialize_merge_result(
173    single_hunk: &ConflictHunk,
174    output: &mut dyn Write,
175) -> std::io::Result<()> {
176    let removed_slices = single_hunk.removes.iter().map(Vec::as_slice).collect_vec();
177    let added_slices = single_hunk.adds.iter().map(Vec::as_slice).collect_vec();
178    let merge_result = files::merge(&removed_slices, &added_slices);
179    match merge_result {
180        MergeResult::Resolved(content) => {
181            output.write_all(&content)?;
182        }
183        MergeResult::Conflict(hunks) => {
184            for hunk in hunks {
185                match hunk {
186                    MergeHunk::Resolved(content) => {
187                        output.write_all(&content)?;
188                    }
189                    MergeHunk::Conflict(ConflictHunk {
190                        mut removes,
191                        mut adds,
192                    }) => {
193                        output.write_all(CONFLICT_START_LINE)?;
194                        while !removes.is_empty() && !adds.is_empty() {
195                            let left = &removes[0];
196                            let mut diffs = vec![];
197                            for right in &adds {
198                                diffs.push(
199                                    Diff::for_tokenizer(&[left, right], &find_line_ranges)
200                                        .hunks()
201                                        .collect_vec(),
202                                );
203                            }
204                            let min_diff_index = diffs
205                                .iter()
206                                .position_min_by_key(|diff| diff_size(diff))
207                                .unwrap();
208                            output.write_all(CONFLICT_DIFF_LINE)?;
209                            write_diff_hunks(&diffs[min_diff_index], output)?;
210                            removes.remove(0);
211                            adds.remove(min_diff_index);
212                        }
213
214                        for slice in removes {
215                            output.write_all(CONFLICT_MINUS_LINE)?;
216                            output.write_all(&slice)?;
217                        }
218                        for slice in adds {
219                            output.write_all(CONFLICT_PLUS_LINE)?;
220                            output.write_all(&slice)?;
221                        }
222                        output.write_all(CONFLICT_END_LINE)?;
223                    }
224                }
225            }
226        }
227    }
228    Ok(())
229}
230
231fn diff_size(hunks: &[DiffHunk]) -> usize {
232    hunks
233        .iter()
234        .map(|hunk| match hunk {
235            DiffHunk::Matching(_) => 0,
236            DiffHunk::Different(slices) => slices.iter().map(|slice| slice.len()).sum(),
237        })
238        .sum()
239}
240
241pub fn conflict_to_materialized_value(
242    store: &Store,
243    path: &RepoPath,
244    conflict: &Conflict,
245) -> TreeValue {
246    let mut buf = vec![];
247    materialize_conflict(store, path, conflict, &mut buf).unwrap();
248    let file_id = store.write_file(path, &mut Cursor::new(&buf)).unwrap();
249    TreeValue::File {
250        id: file_id,
251        executable: false,
252    }
253}
254
255/// Parses conflict markers from a slice. Returns None if there were no valid
256/// conflict markers. The caller has to provide the expected number of removed
257/// and added inputs to the conflicts. Conflict markers that are otherwise valid
258/// will be considered invalid if they don't have the expected arity.
259// TODO: "parse" is not usually the opposite of "materialize", so maybe we
260// should rename them to "serialize" and "deserialize"?
261pub fn parse_conflict(input: &[u8], num_removes: usize, num_adds: usize) -> Option<Vec<MergeHunk>> {
262    if input.is_empty() {
263        return None;
264    }
265    let mut hunks = vec![];
266    let mut pos = 0;
267    let mut resolved_start = 0;
268    let mut conflict_start = None;
269    for line in input.split_inclusive(|b| *b == b'\n') {
270        if line == CONFLICT_START_LINE {
271            conflict_start = Some(pos);
272        } else if conflict_start.is_some() && line == CONFLICT_END_LINE {
273            let conflict_body = &input[conflict_start.unwrap() + CONFLICT_START_LINE.len()..pos];
274            let hunk = parse_conflict_hunk(conflict_body);
275            match &hunk {
276                MergeHunk::Conflict(ConflictHunk { removes, adds })
277                    if removes.len() == num_removes && adds.len() == num_adds =>
278                {
279                    let resolved_slice = &input[resolved_start..conflict_start.unwrap()];
280                    if !resolved_slice.is_empty() {
281                        hunks.push(MergeHunk::Resolved(resolved_slice.to_vec()));
282                    }
283                    hunks.push(hunk);
284                    resolved_start = pos + line.len();
285                }
286                _ => {}
287            }
288            conflict_start = None;
289        }
290        pos += line.len();
291    }
292
293    if hunks.is_empty() {
294        None
295    } else {
296        if resolved_start < input.len() {
297            hunks.push(MergeHunk::Resolved(input[resolved_start..].to_vec()));
298        }
299        Some(hunks)
300    }
301}
302
303fn parse_conflict_hunk(input: &[u8]) -> MergeHunk {
304    enum State {
305        Diff,
306        Minus,
307        Plus,
308        Unknown,
309    }
310    let mut state = State::Unknown;
311    let mut removes = vec![];
312    let mut adds = vec![];
313    for line in input.split_inclusive(|b| *b == b'\n') {
314        match line {
315            CONFLICT_DIFF_LINE => {
316                state = State::Diff;
317                removes.push(vec![]);
318                adds.push(vec![]);
319                continue;
320            }
321            CONFLICT_MINUS_LINE => {
322                state = State::Minus;
323                removes.push(vec![]);
324                continue;
325            }
326            CONFLICT_PLUS_LINE => {
327                state = State::Plus;
328                adds.push(vec![]);
329                continue;
330            }
331            _ => {}
332        };
333        match state {
334            State::Diff => {
335                if let Some(rest) = line.strip_prefix(b"-") {
336                    removes.last_mut().unwrap().extend_from_slice(rest);
337                } else if let Some(rest) = line.strip_prefix(b"+") {
338                    adds.last_mut().unwrap().extend_from_slice(rest);
339                } else if let Some(rest) = line.strip_prefix(b" ") {
340                    removes.last_mut().unwrap().extend_from_slice(rest);
341                    adds.last_mut().unwrap().extend_from_slice(rest);
342                } else {
343                    // Doesn't look like a conflict
344                    return MergeHunk::Resolved(vec![]);
345                }
346            }
347            State::Minus => {
348                removes.last_mut().unwrap().extend_from_slice(line);
349            }
350            State::Plus => {
351                adds.last_mut().unwrap().extend_from_slice(line);
352            }
353            State::Unknown => {
354                // Doesn't look like a conflict
355                return MergeHunk::Resolved(vec![]);
356            }
357        }
358    }
359
360    MergeHunk::Conflict(ConflictHunk { removes, adds })
361}
362
363/// Returns `None` if there are no conflict markers in `content`.
364pub fn update_conflict_from_content(
365    store: &Store,
366    path: &RepoPath,
367    conflict_id: &ConflictId,
368    content: &[u8],
369) -> BackendResult<Option<ConflictId>> {
370    let mut conflict = store.read_conflict(path, conflict_id)?;
371
372    // First check if the new content is unchanged compared to the old content. If
373    // it is, we don't need parse the content or write any new objects to the
374    // store. This is also a way of making sure that unchanged tree/file
375    // conflicts (for example) are not converted to regular files in the working
376    // copy.
377    let mut old_content = Vec::with_capacity(content.len());
378    materialize_conflict(store, path, &conflict, &mut old_content).unwrap();
379    if content == old_content {
380        return Ok(Some(conflict_id.clone()));
381    }
382
383    let mut removed_content = vec![vec![]; conflict.removes.len()];
384    let mut added_content = vec![vec![]; conflict.adds.len()];
385    if let Some(hunks) = parse_conflict(content, conflict.removes.len(), conflict.adds.len()) {
386        for hunk in hunks {
387            match hunk {
388                MergeHunk::Resolved(slice) => {
389                    for buf in &mut removed_content {
390                        buf.extend_from_slice(&slice);
391                    }
392                    for buf in &mut added_content {
393                        buf.extend_from_slice(&slice);
394                    }
395                }
396                MergeHunk::Conflict(ConflictHunk { removes, adds }) => {
397                    for (i, buf) in removes.iter().enumerate() {
398                        removed_content[i].extend_from_slice(buf);
399                    }
400                    for (i, buf) in adds.iter().enumerate() {
401                        added_content[i].extend_from_slice(buf);
402                    }
403                }
404            }
405        }
406        // Now write the new files contents we found by parsing the file
407        // with conflict markers. Update the Conflict object with the new
408        // FileIds.
409        for (i, buf) in removed_content.iter().enumerate() {
410            let file_id = store.write_file(path, &mut Cursor::new(buf))?;
411            if let TreeValue::File { id, executable: _ } = &mut conflict.removes[i].value {
412                *id = file_id;
413            } else {
414                // TODO: This can actually happen. We should check earlier
415                // that the we only attempt to parse the conflicts if it's a
416                // file-only conflict.
417                panic!("Found conflict markers in merge of non-files");
418            }
419        }
420        for (i, buf) in added_content.iter().enumerate() {
421            let file_id = store.write_file(path, &mut Cursor::new(buf))?;
422            if let TreeValue::File { id, executable: _ } = &mut conflict.adds[i].value {
423                *id = file_id;
424            } else {
425                panic!("Found conflict markers in merge of non-files");
426            }
427        }
428        let new_conflict_id = store.write_conflict(path, &conflict)?;
429        Ok(Some(new_conflict_id))
430    } else {
431        Ok(None)
432    }
433}