jj_lib/diff_presentation/
mod.rs

1// Copyright 2025 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Utilities to present file diffs to the user
16
17#![expect(missing_docs)]
18
19use std::borrow::Borrow;
20use std::mem;
21
22use bstr::BString;
23use itertools::Itertools as _;
24use pollster::FutureExt as _;
25
26use crate::backend::BackendResult;
27use crate::conflicts::MaterializedFileValue;
28use crate::diff::CompareBytesExactly;
29use crate::diff::CompareBytesIgnoreAllWhitespace;
30use crate::diff::CompareBytesIgnoreWhitespaceAmount;
31use crate::diff::ContentDiff;
32use crate::diff::DiffHunk;
33use crate::diff::DiffHunkKind;
34use crate::diff::find_line_ranges;
35use crate::merge::Diff;
36use crate::repo_path::RepoPath;
37
38pub mod unified;
39// TODO: colored_diffs utils should also be moved from `jj_cli::diff_utils` to
40// here.
41
42#[derive(Clone, Copy, Debug, Eq, PartialEq)]
43pub enum DiffTokenType {
44    Matching,
45    Different,
46}
47
48type DiffTokenVec<'content> = Vec<(DiffTokenType, &'content [u8])>;
49
50#[derive(Clone, Debug)]
51pub struct FileContent<T> {
52    /// false if this file is likely text; true if it is likely binary.
53    pub is_binary: bool,
54    pub contents: T,
55}
56
57pub fn file_content_for_diff<T>(
58    path: &RepoPath,
59    file: &mut MaterializedFileValue,
60    map_resolved: impl FnOnce(BString) -> T,
61) -> BackendResult<FileContent<T>> {
62    // If this is a binary file, don't show the full contents.
63    // Determine whether it's binary by whether the first 8k bytes contain a null
64    // character; this is the same heuristic used by git as of writing: https://github.com/git/git/blob/eea0e59ffbed6e33d171ace5be13cde9faa41639/xdiff-interface.c#L192-L198
65    const PEEK_SIZE: usize = 8000;
66    // TODO: currently we look at the whole file, even though for binary files we
67    // only need to know the file size. To change that we'd have to extend all
68    // the data backends to support getting the length.
69    let contents = BString::new(file.read_all(path).block_on()?);
70    let start = &contents[..PEEK_SIZE.min(contents.len())];
71    Ok(FileContent {
72        is_binary: start.contains(&b'\0'),
73        contents: map_resolved(contents),
74    })
75}
76
77#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
78pub enum LineCompareMode {
79    /// Compares lines literally.
80    #[default]
81    Exact,
82    /// Compares lines ignoring any whitespace occurrences.
83    IgnoreAllSpace,
84    /// Compares lines ignoring changes in whitespace amount.
85    IgnoreSpaceChange,
86}
87
88pub fn diff_by_line<'input, T: AsRef<[u8]> + ?Sized + 'input>(
89    inputs: impl IntoIterator<Item = &'input T>,
90    options: &LineCompareMode,
91) -> ContentDiff<'input> {
92    // TODO: If we add --ignore-blank-lines, its tokenizer will have to attach
93    // blank lines to the preceding range. Maybe it can also be implemented as a
94    // post-process (similar to refine_changed_regions()) that expands unchanged
95    // regions across blank lines.
96    match options {
97        LineCompareMode::Exact => {
98            ContentDiff::for_tokenizer(inputs, find_line_ranges, CompareBytesExactly)
99        }
100        LineCompareMode::IgnoreAllSpace => {
101            ContentDiff::for_tokenizer(inputs, find_line_ranges, CompareBytesIgnoreAllWhitespace)
102        }
103        LineCompareMode::IgnoreSpaceChange => {
104            ContentDiff::for_tokenizer(inputs, find_line_ranges, CompareBytesIgnoreWhitespaceAmount)
105        }
106    }
107}
108
109/// Splits `[left, right]` hunk pairs into `[left_lines, right_lines]`.
110pub fn unzip_diff_hunks_to_lines<'content, I>(diff_hunks: I) -> Diff<Vec<DiffTokenVec<'content>>>
111where
112    I: IntoIterator,
113    I::Item: Borrow<DiffHunk<'content>>,
114{
115    let mut left_lines: Vec<DiffTokenVec<'content>> = vec![];
116    let mut right_lines: Vec<DiffTokenVec<'content>> = vec![];
117    let mut left_tokens: DiffTokenVec<'content> = vec![];
118    let mut right_tokens: DiffTokenVec<'content> = vec![];
119
120    for hunk in diff_hunks {
121        let hunk = hunk.borrow();
122        match hunk.kind {
123            DiffHunkKind::Matching => {
124                // TODO: add support for unmatched contexts
125                debug_assert!(hunk.contents.iter().all_equal());
126                for token in hunk.contents[0].split_inclusive(|b| *b == b'\n') {
127                    left_tokens.push((DiffTokenType::Matching, token));
128                    right_tokens.push((DiffTokenType::Matching, token));
129                    if token.ends_with(b"\n") {
130                        left_lines.push(mem::take(&mut left_tokens));
131                        right_lines.push(mem::take(&mut right_tokens));
132                    }
133                }
134            }
135            DiffHunkKind::Different => {
136                let [left, right] = hunk.contents[..]
137                    .try_into()
138                    .expect("hunk should have exactly two inputs");
139                for token in left.split_inclusive(|b| *b == b'\n') {
140                    left_tokens.push((DiffTokenType::Different, token));
141                    if token.ends_with(b"\n") {
142                        left_lines.push(mem::take(&mut left_tokens));
143                    }
144                }
145                for token in right.split_inclusive(|b| *b == b'\n') {
146                    right_tokens.push((DiffTokenType::Different, token));
147                    if token.ends_with(b"\n") {
148                        right_lines.push(mem::take(&mut right_tokens));
149                    }
150                }
151            }
152        }
153    }
154
155    if !left_tokens.is_empty() {
156        left_lines.push(left_tokens);
157    }
158    if !right_tokens.is_empty() {
159        right_lines.push(right_tokens);
160    }
161    Diff::new(left_lines, right_lines)
162}