jj_lib/diff_presentation/
unified.rs

1// Copyright 2025 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Utilities to compute unified (Git-style) diffs of 2 sides
16
17use std::ops::Range;
18
19use bstr::BStr;
20use bstr::BString;
21use thiserror::Error;
22
23use super::DiffTokenType;
24use super::DiffTokenVec;
25use super::FileContent;
26use super::LineCompareMode;
27use super::diff_by_line;
28use super::file_content_for_diff;
29use super::unzip_diff_hunks_to_lines;
30use crate::backend::BackendError;
31use crate::conflicts::ConflictMaterializeOptions;
32use crate::conflicts::MaterializedTreeValue;
33use crate::conflicts::materialize_merge_result_to_bytes;
34use crate::diff::ContentDiff;
35use crate::diff::DiffHunkKind;
36use crate::object_id::ObjectId as _;
37use crate::repo_path::RepoPath;
38
39#[derive(Clone, Debug)]
40pub struct GitDiffPart {
41    /// Octal mode string or `None` if the file is absent.
42    pub mode: Option<&'static str>,
43    pub hash: String,
44    pub content: FileContent<BString>,
45}
46
47#[derive(Debug, Error)]
48pub enum UnifiedDiffError {
49    #[error(transparent)]
50    Backend(#[from] BackendError),
51    #[error("Access denied to {path}")]
52    AccessDenied {
53        path: String,
54        source: Box<dyn std::error::Error + Send + Sync>,
55    },
56}
57
58pub fn git_diff_part(
59    path: &RepoPath,
60    value: MaterializedTreeValue,
61    materialize_options: &ConflictMaterializeOptions,
62) -> Result<GitDiffPart, UnifiedDiffError> {
63    const DUMMY_HASH: &str = "0000000000";
64    let mode;
65    let mut hash;
66    let content;
67    match value {
68        MaterializedTreeValue::Absent => {
69            return Ok(GitDiffPart {
70                mode: None,
71                hash: DUMMY_HASH.to_owned(),
72                content: FileContent {
73                    is_binary: false,
74                    contents: BString::default(),
75                },
76            });
77        }
78        MaterializedTreeValue::AccessDenied(err) => {
79            return Err(UnifiedDiffError::AccessDenied {
80                path: path.as_internal_file_string().to_owned(),
81                source: err,
82            });
83        }
84        MaterializedTreeValue::File(mut file) => {
85            mode = if file.executable { "100755" } else { "100644" };
86            hash = file.id.hex();
87            content = file_content_for_diff(path, &mut file, |content| content)?;
88        }
89        MaterializedTreeValue::Symlink { id, target } => {
90            mode = "120000";
91            hash = id.hex();
92            content = FileContent {
93                // Unix file paths can't contain null bytes.
94                is_binary: false,
95                contents: target.into(),
96            };
97        }
98        MaterializedTreeValue::GitSubmodule(id) => {
99            // TODO: What should we actually do here?
100            mode = "040000";
101            hash = id.hex();
102            content = FileContent {
103                is_binary: false,
104                contents: BString::default(),
105            };
106        }
107        MaterializedTreeValue::FileConflict(file) => {
108            mode = match file.executable {
109                Some(true) => "100755",
110                Some(false) | None => "100644",
111            };
112            hash = DUMMY_HASH.to_owned();
113            content = FileContent {
114                is_binary: false, // TODO: are we sure this is never binary?
115                contents: materialize_merge_result_to_bytes(&file.contents, materialize_options),
116            };
117        }
118        MaterializedTreeValue::OtherConflict { id } => {
119            mode = "100644";
120            hash = DUMMY_HASH.to_owned();
121            content = FileContent {
122                is_binary: false,
123                contents: id.describe().into(),
124            };
125        }
126        MaterializedTreeValue::Tree(_) => {
127            panic!("Unexpected tree in diff at path {path:?}");
128        }
129    }
130    hash.truncate(10);
131    Ok(GitDiffPart {
132        mode: Some(mode),
133        hash,
134        content,
135    })
136}
137
138#[derive(Clone, Copy, Debug, Eq, PartialEq)]
139pub enum DiffLineType {
140    Context,
141    Removed,
142    Added,
143}
144
145pub struct UnifiedDiffHunk<'content> {
146    pub left_line_range: Range<usize>,
147    pub right_line_range: Range<usize>,
148    pub lines: Vec<(DiffLineType, DiffTokenVec<'content>)>,
149}
150
151impl<'content> UnifiedDiffHunk<'content> {
152    fn extend_context_lines(&mut self, lines: impl IntoIterator<Item = &'content [u8]>) {
153        let old_len = self.lines.len();
154        self.lines.extend(lines.into_iter().map(|line| {
155            let tokens = vec![(DiffTokenType::Matching, line)];
156            (DiffLineType::Context, tokens)
157        }));
158        self.left_line_range.end += self.lines.len() - old_len;
159        self.right_line_range.end += self.lines.len() - old_len;
160    }
161
162    fn extend_removed_lines(&mut self, lines: impl IntoIterator<Item = DiffTokenVec<'content>>) {
163        let old_len = self.lines.len();
164        self.lines
165            .extend(lines.into_iter().map(|line| (DiffLineType::Removed, line)));
166        self.left_line_range.end += self.lines.len() - old_len;
167    }
168
169    fn extend_added_lines(&mut self, lines: impl IntoIterator<Item = DiffTokenVec<'content>>) {
170        let old_len = self.lines.len();
171        self.lines
172            .extend(lines.into_iter().map(|line| (DiffLineType::Added, line)));
173        self.right_line_range.end += self.lines.len() - old_len;
174    }
175}
176
177pub fn unified_diff_hunks<'content>(
178    contents: [&'content BStr; 2],
179    context: usize,
180    options: LineCompareMode,
181) -> Vec<UnifiedDiffHunk<'content>> {
182    let mut hunks = vec![];
183    let mut current_hunk = UnifiedDiffHunk {
184        left_line_range: 0..0,
185        right_line_range: 0..0,
186        lines: vec![],
187    };
188    let diff = diff_by_line(contents, &options);
189    let mut diff_hunks = diff.hunks().peekable();
190    while let Some(hunk) = diff_hunks.next() {
191        match hunk.kind {
192            DiffHunkKind::Matching => {
193                // Just use the right (i.e. new) content. We could count the
194                // number of skipped lines separately, but the number of the
195                // context lines should match the displayed content.
196                let [_, right] = hunk.contents[..].try_into().unwrap();
197                let mut lines = right.split_inclusive(|b| *b == b'\n').fuse();
198                if !current_hunk.lines.is_empty() {
199                    // The previous hunk line should be either removed/added.
200                    current_hunk.extend_context_lines(lines.by_ref().take(context));
201                }
202                let before_lines = if diff_hunks.peek().is_some() {
203                    lines.by_ref().rev().take(context).collect()
204                } else {
205                    vec![] // No more hunks
206                };
207                let num_skip_lines = lines.count();
208                if num_skip_lines > 0 {
209                    let left_start = current_hunk.left_line_range.end + num_skip_lines;
210                    let right_start = current_hunk.right_line_range.end + num_skip_lines;
211                    if !current_hunk.lines.is_empty() {
212                        hunks.push(current_hunk);
213                    }
214                    current_hunk = UnifiedDiffHunk {
215                        left_line_range: left_start..left_start,
216                        right_line_range: right_start..right_start,
217                        lines: vec![],
218                    };
219                }
220                // The next hunk should be of DiffHunk::Different type if any.
221                current_hunk.extend_context_lines(before_lines.into_iter().rev());
222            }
223            DiffHunkKind::Different => {
224                let [left_lines, right_lines] =
225                    unzip_diff_hunks_to_lines(ContentDiff::by_word(hunk.contents).hunks());
226                current_hunk.extend_removed_lines(left_lines);
227                current_hunk.extend_added_lines(right_lines);
228            }
229        }
230    }
231    if !current_hunk.lines.is_empty() {
232        hunks.push(current_hunk);
233    }
234    hunks
235}