jj_lib/diff_presentation/
mod.rs

1// Copyright 2025 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Utilities to present file diffs to the user
16
17#![expect(missing_docs)]
18
19use std::borrow::Borrow;
20use std::mem;
21
22use bstr::BString;
23use itertools::Itertools as _;
24use pollster::FutureExt as _;
25
26use crate::backend::BackendResult;
27use crate::conflicts::MaterializedFileValue;
28use crate::diff::CompareBytesExactly;
29use crate::diff::CompareBytesIgnoreAllWhitespace;
30use crate::diff::CompareBytesIgnoreWhitespaceAmount;
31use crate::diff::ContentDiff;
32use crate::diff::DiffHunk;
33use crate::diff::DiffHunkKind;
34use crate::diff::find_line_ranges;
35use crate::merge::Diff;
36use crate::merge::Merge;
37use crate::repo_path::RepoPath;
38
39pub mod unified;
40// TODO: colored_diffs utils should also be moved from `jj_cli::diff_utils` to
41// here.
42
43#[derive(Clone, Copy, Debug, Eq, PartialEq)]
44pub enum DiffTokenType {
45    Matching,
46    Different,
47}
48
49type DiffTokenVec<'content> = Vec<(DiffTokenType, &'content [u8])>;
50
51#[derive(Clone, Debug)]
52pub struct FileContent<T> {
53    /// false if this file is likely text; true if it is likely binary.
54    pub is_binary: bool,
55    pub contents: T,
56}
57
58impl FileContent<Merge<BString>> {
59    pub fn is_empty(&self) -> bool {
60        self.contents.as_resolved().is_some_and(|c| c.is_empty())
61    }
62}
63
64pub fn file_content_for_diff<T>(
65    path: &RepoPath,
66    file: &mut MaterializedFileValue,
67    map_resolved: impl FnOnce(BString) -> T,
68) -> BackendResult<FileContent<T>> {
69    // If this is a binary file, don't show the full contents.
70    // Determine whether it's binary by whether the first 8k bytes contain a null
71    // character; this is the same heuristic used by git as of writing: https://github.com/git/git/blob/eea0e59ffbed6e33d171ace5be13cde9faa41639/xdiff-interface.c#L192-L198
72    const PEEK_SIZE: usize = 8000;
73    // TODO: currently we look at the whole file, even though for binary files we
74    // only need to know the file size. To change that we'd have to extend all
75    // the data backends to support getting the length.
76    let contents = BString::new(file.read_all(path).block_on()?);
77    let start = &contents[..PEEK_SIZE.min(contents.len())];
78    Ok(FileContent {
79        is_binary: start.contains(&b'\0'),
80        contents: map_resolved(contents),
81    })
82}
83
84#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
85pub enum LineCompareMode {
86    /// Compares lines literally.
87    #[default]
88    Exact,
89    /// Compares lines ignoring any whitespace occurrences.
90    IgnoreAllSpace,
91    /// Compares lines ignoring changes in whitespace amount.
92    IgnoreSpaceChange,
93}
94
95pub fn diff_by_line<'input, T: AsRef<[u8]> + ?Sized + 'input>(
96    inputs: impl IntoIterator<Item = &'input T>,
97    options: &LineCompareMode,
98) -> ContentDiff<'input> {
99    // TODO: If we add --ignore-blank-lines, its tokenizer will have to attach
100    // blank lines to the preceding range. Maybe it can also be implemented as a
101    // post-process (similar to refine_changed_regions()) that expands unchanged
102    // regions across blank lines.
103    match options {
104        LineCompareMode::Exact => {
105            ContentDiff::for_tokenizer(inputs, find_line_ranges, CompareBytesExactly)
106        }
107        LineCompareMode::IgnoreAllSpace => {
108            ContentDiff::for_tokenizer(inputs, find_line_ranges, CompareBytesIgnoreAllWhitespace)
109        }
110        LineCompareMode::IgnoreSpaceChange => {
111            ContentDiff::for_tokenizer(inputs, find_line_ranges, CompareBytesIgnoreWhitespaceAmount)
112        }
113    }
114}
115
116/// Splits `[left, right]` hunk pairs into `[left_lines, right_lines]`.
117pub fn unzip_diff_hunks_to_lines<'content, I>(diff_hunks: I) -> Diff<Vec<DiffTokenVec<'content>>>
118where
119    I: IntoIterator,
120    I::Item: Borrow<DiffHunk<'content>>,
121{
122    let mut left_lines: Vec<DiffTokenVec<'content>> = vec![];
123    let mut right_lines: Vec<DiffTokenVec<'content>> = vec![];
124    let mut left_tokens: DiffTokenVec<'content> = vec![];
125    let mut right_tokens: DiffTokenVec<'content> = vec![];
126
127    for hunk in diff_hunks {
128        let hunk = hunk.borrow();
129        match hunk.kind {
130            DiffHunkKind::Matching => {
131                // TODO: add support for unmatched contexts
132                debug_assert!(hunk.contents.iter().all_equal());
133                for token in hunk.contents[0].split_inclusive(|b| *b == b'\n') {
134                    left_tokens.push((DiffTokenType::Matching, token));
135                    right_tokens.push((DiffTokenType::Matching, token));
136                    if token.ends_with(b"\n") {
137                        left_lines.push(mem::take(&mut left_tokens));
138                        right_lines.push(mem::take(&mut right_tokens));
139                    }
140                }
141            }
142            DiffHunkKind::Different => {
143                let [left, right] = hunk.contents[..]
144                    .try_into()
145                    .expect("hunk should have exactly two inputs");
146                for token in left.split_inclusive(|b| *b == b'\n') {
147                    left_tokens.push((DiffTokenType::Different, token));
148                    if token.ends_with(b"\n") {
149                        left_lines.push(mem::take(&mut left_tokens));
150                    }
151                }
152                for token in right.split_inclusive(|b| *b == b'\n') {
153                    right_tokens.push((DiffTokenType::Different, token));
154                    if token.ends_with(b"\n") {
155                        right_lines.push(mem::take(&mut right_tokens));
156                    }
157                }
158            }
159        }
160    }
161
162    if !left_tokens.is_empty() {
163        left_lines.push(left_tokens);
164    }
165    if !right_tokens.is_empty() {
166        right_lines.push(right_tokens);
167    }
168    Diff::new(left_lines, right_lines)
169}