Skip to main content

jj_lib/diff_presentation/
mod.rs

1// Copyright 2025 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Utilities to present file diffs to the user
16
17#![expect(missing_docs)]
18
19use std::borrow::Borrow;
20use std::mem;
21
22use bstr::BString;
23use itertools::Itertools as _;
24
25use crate::backend::BackendResult;
26use crate::conflicts::MaterializedFileValue;
27use crate::diff::CompareBytesExactly;
28use crate::diff::CompareBytesIgnoreAllWhitespace;
29use crate::diff::CompareBytesIgnoreWhitespaceAmount;
30use crate::diff::ContentDiff;
31use crate::diff::DiffHunk;
32use crate::diff::DiffHunkKind;
33use crate::diff::find_line_ranges;
34use crate::merge::Diff;
35use crate::repo_path::RepoPath;
36
37pub mod unified;
38// TODO: colored_diffs utils should also be moved from `jj_cli::diff_utils` to
39// here.
40
41#[derive(Clone, Copy, Debug, Eq, PartialEq)]
42pub enum DiffTokenType {
43    Matching,
44    Different,
45}
46
47type DiffTokenVec<'content> = Vec<(DiffTokenType, &'content [u8])>;
48
49#[derive(Clone, Debug)]
50pub struct FileContent<T> {
51    /// false if this file is likely text; true if it is likely binary.
52    pub is_binary: bool,
53    pub contents: T,
54}
55
56pub async fn file_content_for_diff<T>(
57    path: &RepoPath,
58    file: &mut MaterializedFileValue,
59    map_resolved: impl FnOnce(BString) -> T,
60) -> BackendResult<FileContent<T>> {
61    // If this is a binary file, don't show the full contents.
62    // Determine whether it's binary by whether the first 8k bytes contain a null
63    // character; this is the same heuristic used by git as of writing: https://github.com/git/git/blob/eea0e59ffbed6e33d171ace5be13cde9faa41639/xdiff-interface.c#L192-L198
64    const PEEK_SIZE: usize = 8000;
65    // TODO: currently we look at the whole file, even though for binary files we
66    // only need to know the file size. To change that we'd have to extend all
67    // the data backends to support getting the length.
68    let contents = BString::new(file.read_all(path).await?);
69    let start = &contents[..PEEK_SIZE.min(contents.len())];
70    Ok(FileContent {
71        is_binary: start.contains(&b'\0'),
72        contents: map_resolved(contents),
73    })
74}
75
76#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
77pub enum LineCompareMode {
78    /// Compares lines literally.
79    #[default]
80    Exact,
81    /// Compares lines ignoring any whitespace occurrences.
82    IgnoreAllSpace,
83    /// Compares lines ignoring changes in whitespace amount.
84    IgnoreSpaceChange,
85}
86
87pub fn diff_by_line<'input, T: AsRef<[u8]> + ?Sized + 'input>(
88    inputs: impl IntoIterator<Item = &'input T>,
89    options: &LineCompareMode,
90) -> ContentDiff<'input> {
91    // TODO: If we add --ignore-blank-lines, its tokenizer will have to attach
92    // blank lines to the preceding range. Maybe it can also be implemented as a
93    // post-process (similar to refine_changed_regions()) that expands unchanged
94    // regions across blank lines.
95    match options {
96        LineCompareMode::Exact => {
97            ContentDiff::for_tokenizer(inputs, find_line_ranges, CompareBytesExactly)
98        }
99        LineCompareMode::IgnoreAllSpace => {
100            ContentDiff::for_tokenizer(inputs, find_line_ranges, CompareBytesIgnoreAllWhitespace)
101        }
102        LineCompareMode::IgnoreSpaceChange => {
103            ContentDiff::for_tokenizer(inputs, find_line_ranges, CompareBytesIgnoreWhitespaceAmount)
104        }
105    }
106}
107
108/// Splits `[left, right]` hunk pairs into `[left_lines, right_lines]`.
109pub fn unzip_diff_hunks_to_lines<'content, I>(diff_hunks: I) -> Diff<Vec<DiffTokenVec<'content>>>
110where
111    I: IntoIterator,
112    I::Item: Borrow<DiffHunk<'content>>,
113{
114    let mut left_lines: Vec<DiffTokenVec<'content>> = vec![];
115    let mut right_lines: Vec<DiffTokenVec<'content>> = vec![];
116    let mut left_tokens: DiffTokenVec<'content> = vec![];
117    let mut right_tokens: DiffTokenVec<'content> = vec![];
118
119    for hunk in diff_hunks {
120        let hunk = hunk.borrow();
121        match hunk.kind {
122            DiffHunkKind::Matching => {
123                // TODO: add support for unmatched contexts
124                debug_assert!(hunk.contents.iter().all_equal());
125                for token in hunk.contents[0].split_inclusive(|b| *b == b'\n') {
126                    left_tokens.push((DiffTokenType::Matching, token));
127                    right_tokens.push((DiffTokenType::Matching, token));
128                    if token.ends_with(b"\n") {
129                        left_lines.push(mem::take(&mut left_tokens));
130                        right_lines.push(mem::take(&mut right_tokens));
131                    }
132                }
133            }
134            DiffHunkKind::Different => {
135                let [left, right] = hunk.contents[..]
136                    .try_into()
137                    .expect("hunk should have exactly two inputs");
138                for token in left.split_inclusive(|b| *b == b'\n') {
139                    left_tokens.push((DiffTokenType::Different, token));
140                    if token.ends_with(b"\n") {
141                        left_lines.push(mem::take(&mut left_tokens));
142                    }
143                }
144                for token in right.split_inclusive(|b| *b == b'\n') {
145                    right_tokens.push((DiffTokenType::Different, token));
146                    if token.ends_with(b"\n") {
147                        right_lines.push(mem::take(&mut right_tokens));
148                    }
149                }
150            }
151        }
152    }
153
154    if !left_tokens.is_empty() {
155        left_lines.push(left_tokens);
156    }
157    if !right_tokens.is_empty() {
158        right_lines.push(right_tokens);
159    }
160    Diff::new(left_lines, right_lines)
161}