jj_lib/diff_presentation/
unified.rs

1// Copyright 2025 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Utilities to compute unified (Git-style) diffs of 2 sides
16
17use std::ops::Range;
18
19use bstr::BStr;
20use bstr::BString;
21use thiserror::Error;
22
23use super::DiffTokenType;
24use super::DiffTokenVec;
25use super::FileContent;
26use super::LineCompareMode;
27use super::diff_by_line;
28use super::file_content_for_diff;
29use super::unzip_diff_hunks_to_lines;
30use crate::backend::BackendError;
31use crate::conflicts::ConflictMaterializeOptions;
32use crate::conflicts::MaterializedTreeValue;
33use crate::conflicts::materialize_merge_result_to_bytes;
34use crate::diff::ContentDiff;
35use crate::diff::DiffHunkKind;
36use crate::merge::Diff;
37use crate::object_id::ObjectId as _;
38use crate::repo_path::RepoPath;
39
40#[derive(Clone, Debug)]
41pub struct GitDiffPart {
42    /// Octal mode string or `None` if the file is absent.
43    pub mode: Option<&'static str>,
44    pub hash: String,
45    pub content: FileContent<BString>,
46}
47
48#[derive(Debug, Error)]
49pub enum UnifiedDiffError {
50    #[error(transparent)]
51    Backend(#[from] BackendError),
52    #[error("Access denied to {path}")]
53    AccessDenied {
54        path: String,
55        source: Box<dyn std::error::Error + Send + Sync>,
56    },
57}
58
59pub fn git_diff_part(
60    path: &RepoPath,
61    value: MaterializedTreeValue,
62    materialize_options: &ConflictMaterializeOptions,
63) -> Result<GitDiffPart, UnifiedDiffError> {
64    const DUMMY_HASH: &str = "0000000000";
65    let mode;
66    let mut hash;
67    let content;
68    match value {
69        MaterializedTreeValue::Absent => {
70            return Ok(GitDiffPart {
71                mode: None,
72                hash: DUMMY_HASH.to_owned(),
73                content: FileContent {
74                    is_binary: false,
75                    contents: BString::default(),
76                },
77            });
78        }
79        MaterializedTreeValue::AccessDenied(err) => {
80            return Err(UnifiedDiffError::AccessDenied {
81                path: path.as_internal_file_string().to_owned(),
82                source: err,
83            });
84        }
85        MaterializedTreeValue::File(mut file) => {
86            mode = if file.executable { "100755" } else { "100644" };
87            hash = file.id.hex();
88            content = file_content_for_diff(path, &mut file, |content| content)?;
89        }
90        MaterializedTreeValue::Symlink { id, target } => {
91            mode = "120000";
92            hash = id.hex();
93            content = FileContent {
94                // Unix file paths can't contain null bytes.
95                is_binary: false,
96                contents: target.into(),
97            };
98        }
99        MaterializedTreeValue::GitSubmodule(id) => {
100            // TODO: What should we actually do here?
101            mode = "040000";
102            hash = id.hex();
103            content = FileContent {
104                is_binary: false,
105                contents: BString::default(),
106            };
107        }
108        MaterializedTreeValue::FileConflict(file) => {
109            mode = match file.executable {
110                Some(true) => "100755",
111                Some(false) | None => "100644",
112            };
113            hash = DUMMY_HASH.to_owned();
114            content = FileContent {
115                is_binary: false, // TODO: are we sure this is never binary?
116                contents: materialize_merge_result_to_bytes(&file.contents, materialize_options),
117            };
118        }
119        MaterializedTreeValue::OtherConflict { id } => {
120            mode = "100644";
121            hash = DUMMY_HASH.to_owned();
122            content = FileContent {
123                is_binary: false,
124                contents: id.describe().into(),
125            };
126        }
127        MaterializedTreeValue::Tree(_) => {
128            panic!("Unexpected tree in diff at path {path:?}");
129        }
130    }
131    hash.truncate(10);
132    Ok(GitDiffPart {
133        mode: Some(mode),
134        hash,
135        content,
136    })
137}
138
139#[derive(Clone, Copy, Debug, Eq, PartialEq)]
140pub enum DiffLineType {
141    Context,
142    Removed,
143    Added,
144}
145
146pub struct UnifiedDiffHunk<'content> {
147    pub left_line_range: Range<usize>,
148    pub right_line_range: Range<usize>,
149    pub lines: Vec<(DiffLineType, DiffTokenVec<'content>)>,
150}
151
152impl<'content> UnifiedDiffHunk<'content> {
153    fn extend_context_lines(&mut self, lines: impl IntoIterator<Item = &'content [u8]>) {
154        let old_len = self.lines.len();
155        self.lines.extend(lines.into_iter().map(|line| {
156            let tokens = vec![(DiffTokenType::Matching, line)];
157            (DiffLineType::Context, tokens)
158        }));
159        self.left_line_range.end += self.lines.len() - old_len;
160        self.right_line_range.end += self.lines.len() - old_len;
161    }
162
163    fn extend_removed_lines(&mut self, lines: impl IntoIterator<Item = DiffTokenVec<'content>>) {
164        let old_len = self.lines.len();
165        self.lines
166            .extend(lines.into_iter().map(|line| (DiffLineType::Removed, line)));
167        self.left_line_range.end += self.lines.len() - old_len;
168    }
169
170    fn extend_added_lines(&mut self, lines: impl IntoIterator<Item = DiffTokenVec<'content>>) {
171        let old_len = self.lines.len();
172        self.lines
173            .extend(lines.into_iter().map(|line| (DiffLineType::Added, line)));
174        self.right_line_range.end += self.lines.len() - old_len;
175    }
176}
177
178pub fn unified_diff_hunks<'content>(
179    contents: Diff<&'content BStr>,
180    context: usize,
181    options: LineCompareMode,
182) -> Vec<UnifiedDiffHunk<'content>> {
183    let mut hunks = vec![];
184    let mut current_hunk = UnifiedDiffHunk {
185        left_line_range: 0..0,
186        right_line_range: 0..0,
187        lines: vec![],
188    };
189    let diff = diff_by_line(contents.into_array(), &options);
190    let mut diff_hunks = diff.hunks().peekable();
191    while let Some(hunk) = diff_hunks.next() {
192        match hunk.kind {
193            DiffHunkKind::Matching => {
194                // Just use the right (i.e. new) content. We could count the
195                // number of skipped lines separately, but the number of the
196                // context lines should match the displayed content.
197                let [_, right] = hunk.contents[..].try_into().unwrap();
198                let mut lines = right.split_inclusive(|b| *b == b'\n').fuse();
199                if !current_hunk.lines.is_empty() {
200                    // The previous hunk line should be either removed/added.
201                    current_hunk.extend_context_lines(lines.by_ref().take(context));
202                }
203                let before_lines = if diff_hunks.peek().is_some() {
204                    lines.by_ref().rev().take(context).collect()
205                } else {
206                    vec![] // No more hunks
207                };
208                let num_skip_lines = lines.count();
209                if num_skip_lines > 0 {
210                    let left_start = current_hunk.left_line_range.end + num_skip_lines;
211                    let right_start = current_hunk.right_line_range.end + num_skip_lines;
212                    if !current_hunk.lines.is_empty() {
213                        hunks.push(current_hunk);
214                    }
215                    current_hunk = UnifiedDiffHunk {
216                        left_line_range: left_start..left_start,
217                        right_line_range: right_start..right_start,
218                        lines: vec![],
219                    };
220                }
221                // The next hunk should be of DiffHunk::Different type if any.
222                current_hunk.extend_context_lines(before_lines.into_iter().rev());
223            }
224            DiffHunkKind::Different => {
225                let lines = unzip_diff_hunks_to_lines(ContentDiff::by_word(hunk.contents).hunks());
226                current_hunk.extend_removed_lines(lines.before);
227                current_hunk.extend_added_lines(lines.after);
228            }
229        }
230    }
231    if !current_hunk.lines.is_empty() {
232        hunks.push(current_hunk);
233    }
234    hunks
235}