jj_lib/diff_presentation/
mod.rs

1// Copyright 2025 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Utilities to present file diffs to the user
16
17#![expect(missing_docs)]
18
19use std::borrow::Borrow;
20use std::mem;
21
22use bstr::BString;
23use itertools::Itertools as _;
24use pollster::FutureExt as _;
25
26use crate::backend::BackendResult;
27use crate::conflicts::MaterializedFileValue;
28use crate::diff::CompareBytesExactly;
29use crate::diff::CompareBytesIgnoreAllWhitespace;
30use crate::diff::CompareBytesIgnoreWhitespaceAmount;
31use crate::diff::ContentDiff;
32use crate::diff::DiffHunk;
33use crate::diff::DiffHunkKind;
34use crate::diff::find_line_ranges;
35use crate::merge::Merge;
36use crate::repo_path::RepoPath;
37
38pub mod unified;
39// TODO: colored_diffs utils should also be moved from `jj_cli::diff_utils` to
40// here.
41
42#[derive(Clone, Copy, Debug, Eq, PartialEq)]
43pub enum DiffTokenType {
44    Matching,
45    Different,
46}
47
48type DiffTokenVec<'content> = Vec<(DiffTokenType, &'content [u8])>;
49
50#[derive(Clone, Debug)]
51pub struct FileContent<T> {
52    /// false if this file is likely text; true if it is likely binary.
53    pub is_binary: bool,
54    pub contents: T,
55}
56
57impl FileContent<Merge<BString>> {
58    pub fn is_empty(&self) -> bool {
59        self.contents.as_resolved().is_some_and(|c| c.is_empty())
60    }
61}
62
63pub fn file_content_for_diff<T>(
64    path: &RepoPath,
65    file: &mut MaterializedFileValue,
66    map_resolved: impl FnOnce(BString) -> T,
67) -> BackendResult<FileContent<T>> {
68    // If this is a binary file, don't show the full contents.
69    // Determine whether it's binary by whether the first 8k bytes contain a null
70    // character; this is the same heuristic used by git as of writing: https://github.com/git/git/blob/eea0e59ffbed6e33d171ace5be13cde9faa41639/xdiff-interface.c#L192-L198
71    const PEEK_SIZE: usize = 8000;
72    // TODO: currently we look at the whole file, even though for binary files we
73    // only need to know the file size. To change that we'd have to extend all
74    // the data backends to support getting the length.
75    let contents = BString::new(file.read_all(path).block_on()?);
76    let start = &contents[..PEEK_SIZE.min(contents.len())];
77    Ok(FileContent {
78        is_binary: start.contains(&b'\0'),
79        contents: map_resolved(contents),
80    })
81}
82
83#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
84pub enum LineCompareMode {
85    /// Compares lines literally.
86    #[default]
87    Exact,
88    /// Compares lines ignoring any whitespace occurrences.
89    IgnoreAllSpace,
90    /// Compares lines ignoring changes in whitespace amount.
91    IgnoreSpaceChange,
92}
93
94pub fn diff_by_line<'input, T: AsRef<[u8]> + ?Sized + 'input>(
95    inputs: impl IntoIterator<Item = &'input T>,
96    options: &LineCompareMode,
97) -> ContentDiff<'input> {
98    // TODO: If we add --ignore-blank-lines, its tokenizer will have to attach
99    // blank lines to the preceding range. Maybe it can also be implemented as a
100    // post-process (similar to refine_changed_regions()) that expands unchanged
101    // regions across blank lines.
102    match options {
103        LineCompareMode::Exact => {
104            ContentDiff::for_tokenizer(inputs, find_line_ranges, CompareBytesExactly)
105        }
106        LineCompareMode::IgnoreAllSpace => {
107            ContentDiff::for_tokenizer(inputs, find_line_ranges, CompareBytesIgnoreAllWhitespace)
108        }
109        LineCompareMode::IgnoreSpaceChange => {
110            ContentDiff::for_tokenizer(inputs, find_line_ranges, CompareBytesIgnoreWhitespaceAmount)
111        }
112    }
113}
114
115/// Splits `[left, right]` hunk pairs into `[left_lines, right_lines]`.
116pub fn unzip_diff_hunks_to_lines<'content, I>(diff_hunks: I) -> [Vec<DiffTokenVec<'content>>; 2]
117where
118    I: IntoIterator,
119    I::Item: Borrow<DiffHunk<'content>>,
120{
121    let mut left_lines: Vec<DiffTokenVec<'content>> = vec![];
122    let mut right_lines: Vec<DiffTokenVec<'content>> = vec![];
123    let mut left_tokens: DiffTokenVec<'content> = vec![];
124    let mut right_tokens: DiffTokenVec<'content> = vec![];
125
126    for hunk in diff_hunks {
127        let hunk = hunk.borrow();
128        match hunk.kind {
129            DiffHunkKind::Matching => {
130                // TODO: add support for unmatched contexts
131                debug_assert!(hunk.contents.iter().all_equal());
132                for token in hunk.contents[0].split_inclusive(|b| *b == b'\n') {
133                    left_tokens.push((DiffTokenType::Matching, token));
134                    right_tokens.push((DiffTokenType::Matching, token));
135                    if token.ends_with(b"\n") {
136                        left_lines.push(mem::take(&mut left_tokens));
137                        right_lines.push(mem::take(&mut right_tokens));
138                    }
139                }
140            }
141            DiffHunkKind::Different => {
142                let [left, right] = hunk.contents[..]
143                    .try_into()
144                    .expect("hunk should have exactly two inputs");
145                for token in left.split_inclusive(|b| *b == b'\n') {
146                    left_tokens.push((DiffTokenType::Different, token));
147                    if token.ends_with(b"\n") {
148                        left_lines.push(mem::take(&mut left_tokens));
149                    }
150                }
151                for token in right.split_inclusive(|b| *b == b'\n') {
152                    right_tokens.push((DiffTokenType::Different, token));
153                    if token.ends_with(b"\n") {
154                        right_lines.push(mem::take(&mut right_tokens));
155                    }
156                }
157            }
158        }
159    }
160
161    if !left_tokens.is_empty() {
162        left_lines.push(left_tokens);
163    }
164    if !right_tokens.is_empty() {
165        right_lines.push(right_tokens);
166    }
167    [left_lines, right_lines]
168}