jj_lib/diff_presentation/
unified.rs

1// Copyright 2025 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Utilities to compute unified (Git-style) diffs of 2 sides
16
17use std::ops::Range;
18
19use bstr::BStr;
20use bstr::BString;
21use thiserror::Error;
22
23use super::DiffTokenType;
24use super::DiffTokenVec;
25use super::FileContent;
26use super::LineCompareMode;
27use super::diff_by_line;
28use super::file_content_for_diff;
29use super::unzip_diff_hunks_to_lines;
30use crate::backend::BackendError;
31use crate::conflicts::ConflictMaterializeOptions;
32use crate::conflicts::MaterializedTreeValue;
33use crate::conflicts::materialize_merge_result_to_bytes;
34use crate::diff::ContentDiff;
35use crate::diff::DiffHunkKind;
36use crate::merge::Diff;
37use crate::object_id::ObjectId as _;
38use crate::repo_path::RepoPath;
39
40#[derive(Clone, Debug)]
41pub struct GitDiffPart {
42    /// Octal mode string or `None` if the file is absent.
43    pub mode: Option<&'static str>,
44    pub hash: String,
45    pub content: FileContent<BString>,
46}
47
48#[derive(Debug, Error)]
49pub enum UnifiedDiffError {
50    #[error(transparent)]
51    Backend(#[from] BackendError),
52    #[error("Access denied to {path}")]
53    AccessDenied {
54        path: String,
55        source: Box<dyn std::error::Error + Send + Sync>,
56    },
57}
58
59pub fn git_diff_part(
60    path: &RepoPath,
61    value: MaterializedTreeValue,
62    materialize_options: &ConflictMaterializeOptions,
63) -> Result<GitDiffPart, UnifiedDiffError> {
64    const DUMMY_HASH: &str = "0000000000";
65    let mode;
66    let mut hash;
67    let content;
68    match value {
69        MaterializedTreeValue::Absent => {
70            return Ok(GitDiffPart {
71                mode: None,
72                hash: DUMMY_HASH.to_owned(),
73                content: FileContent {
74                    is_binary: false,
75                    contents: BString::default(),
76                },
77            });
78        }
79        MaterializedTreeValue::AccessDenied(err) => {
80            return Err(UnifiedDiffError::AccessDenied {
81                path: path.as_internal_file_string().to_owned(),
82                source: err,
83            });
84        }
85        MaterializedTreeValue::File(mut file) => {
86            mode = if file.executable { "100755" } else { "100644" };
87            hash = file.id.hex();
88            content = file_content_for_diff(path, &mut file, |content| content)?;
89        }
90        MaterializedTreeValue::Symlink { id, target } => {
91            mode = "120000";
92            hash = id.hex();
93            content = FileContent {
94                // Unix file paths can't contain null bytes.
95                is_binary: false,
96                contents: target.into(),
97            };
98        }
99        MaterializedTreeValue::GitSubmodule(id) => {
100            // TODO: What should we actually do here?
101            mode = "040000";
102            hash = id.hex();
103            content = FileContent {
104                is_binary: false,
105                contents: BString::default(),
106            };
107        }
108        MaterializedTreeValue::FileConflict(file) => {
109            mode = match file.executable {
110                Some(true) => "100755",
111                Some(false) | None => "100644",
112            };
113            hash = DUMMY_HASH.to_owned();
114            content = FileContent {
115                is_binary: false, // TODO: are we sure this is never binary?
116                contents: materialize_merge_result_to_bytes(
117                    &file.contents,
118                    &file.labels,
119                    materialize_options,
120                ),
121            };
122        }
123        MaterializedTreeValue::OtherConflict { id, labels } => {
124            mode = "100644";
125            hash = DUMMY_HASH.to_owned();
126            content = FileContent {
127                is_binary: false,
128                contents: id.describe(&labels).into(),
129            };
130        }
131        MaterializedTreeValue::Tree(_) => {
132            panic!("Unexpected tree in diff at path {path:?}");
133        }
134    }
135    hash.truncate(10);
136    Ok(GitDiffPart {
137        mode: Some(mode),
138        hash,
139        content,
140    })
141}
142
143#[derive(Clone, Copy, Debug, Eq, PartialEq)]
144pub enum DiffLineType {
145    Context,
146    Removed,
147    Added,
148}
149
150pub struct UnifiedDiffHunk<'content> {
151    pub left_line_range: Range<usize>,
152    pub right_line_range: Range<usize>,
153    pub lines: Vec<(DiffLineType, DiffTokenVec<'content>)>,
154}
155
156impl<'content> UnifiedDiffHunk<'content> {
157    fn extend_context_lines(&mut self, lines: impl IntoIterator<Item = &'content [u8]>) {
158        let old_len = self.lines.len();
159        self.lines.extend(lines.into_iter().map(|line| {
160            let tokens = vec![(DiffTokenType::Matching, line)];
161            (DiffLineType::Context, tokens)
162        }));
163        self.left_line_range.end += self.lines.len() - old_len;
164        self.right_line_range.end += self.lines.len() - old_len;
165    }
166
167    fn extend_removed_lines(&mut self, lines: impl IntoIterator<Item = DiffTokenVec<'content>>) {
168        let old_len = self.lines.len();
169        self.lines
170            .extend(lines.into_iter().map(|line| (DiffLineType::Removed, line)));
171        self.left_line_range.end += self.lines.len() - old_len;
172    }
173
174    fn extend_added_lines(&mut self, lines: impl IntoIterator<Item = DiffTokenVec<'content>>) {
175        let old_len = self.lines.len();
176        self.lines
177            .extend(lines.into_iter().map(|line| (DiffLineType::Added, line)));
178        self.right_line_range.end += self.lines.len() - old_len;
179    }
180}
181
182pub fn unified_diff_hunks<'content>(
183    contents: Diff<&'content BStr>,
184    context: usize,
185    options: LineCompareMode,
186) -> Vec<UnifiedDiffHunk<'content>> {
187    let mut hunks = vec![];
188    let mut current_hunk = UnifiedDiffHunk {
189        left_line_range: 0..0,
190        right_line_range: 0..0,
191        lines: vec![],
192    };
193    let diff = diff_by_line(contents.into_array(), &options);
194    let mut diff_hunks = diff.hunks().peekable();
195    while let Some(hunk) = diff_hunks.next() {
196        match hunk.kind {
197            DiffHunkKind::Matching => {
198                // Just use the right (i.e. new) content. We could count the
199                // number of skipped lines separately, but the number of the
200                // context lines should match the displayed content.
201                let [_, right] = hunk.contents[..].try_into().unwrap();
202                let mut lines = right.split_inclusive(|b| *b == b'\n').fuse();
203                if !current_hunk.lines.is_empty() {
204                    // The previous hunk line should be either removed/added.
205                    current_hunk.extend_context_lines(lines.by_ref().take(context));
206                }
207                let before_lines = if diff_hunks.peek().is_some() {
208                    lines.by_ref().rev().take(context).collect()
209                } else {
210                    vec![] // No more hunks
211                };
212                let num_skip_lines = lines.count();
213                if num_skip_lines > 0 {
214                    let left_start = current_hunk.left_line_range.end + num_skip_lines;
215                    let right_start = current_hunk.right_line_range.end + num_skip_lines;
216                    if !current_hunk.lines.is_empty() {
217                        hunks.push(current_hunk);
218                    }
219                    current_hunk = UnifiedDiffHunk {
220                        left_line_range: left_start..left_start,
221                        right_line_range: right_start..right_start,
222                        lines: vec![],
223                    };
224                }
225                // The next hunk should be of DiffHunk::Different type if any.
226                current_hunk.extend_context_lines(before_lines.into_iter().rev());
227            }
228            DiffHunkKind::Different => {
229                let lines = unzip_diff_hunks_to_lines(ContentDiff::by_word(hunk.contents).hunks());
230                current_hunk.extend_removed_lines(lines.before);
231                current_hunk.extend_added_lines(lines.after);
232            }
233        }
234    }
235    if !current_hunk.lines.is_empty() {
236        hunks.push(current_hunk);
237    }
238    hunks
239}