gix_diff/blob/unified_diff/
impls.rs

1use bstr::{BString, ByteSlice, ByteVec};
2use imara_diff::{intern, Sink};
3use intern::{InternedInput, Interner, Token};
4use std::fmt::Write;
5use std::{hash::Hash, ops::Range};
6
7use super::{ConsumeBinaryHunk, ConsumeBinaryHunkDelegate, ConsumeHunk, ContextSize, DiffLineKind, HunkHeader};
8
9/// A [`Sink`] that creates a unified diff. It can be used to create a textual diff in the
10/// format typically output by `git` or `gnu-diff` if the `-u` option is used.
11pub struct UnifiedDiff<'a, T, D>
12where
13    T: Hash + Eq + AsRef<[u8]>,
14    D: ConsumeHunk,
15{
16    before: &'a [Token],
17    after: &'a [Token],
18    interner: &'a Interner<T>,
19
20    /// The 0-based start position in the 'before' tokens for the accumulated hunk for display in the header.
21    before_hunk_start: u32,
22    /// The size of the accumulated 'before' hunk in lines for display in the header.
23    before_hunk_len: u32,
24    /// The 0-based start position in the 'after' tokens for the accumulated hunk for display in the header.
25    after_hunk_start: u32,
26    /// The size of the accumulated 'after' hunk in lines.
27    after_hunk_len: u32,
28    // An index into `before` and the context line to print next,
29    // or `None` if this value was never computed to be the correct starting point for an accumulated hunk.
30    ctx_pos: Option<u32>,
31
32    /// Symmetrical context before and after the changed hunk.
33    ctx_size: u32,
34
35    buffer: Vec<(DiffLineKind, &'a [u8])>,
36
37    delegate: D,
38
39    err: Option<std::io::Error>,
40}
41
42impl<'a, T, D> UnifiedDiff<'a, T, D>
43where
44    T: Hash + Eq + AsRef<[u8]>,
45    D: ConsumeHunk,
46{
47    /// Create a new instance to create a unified diff using the lines in `input`,
48    /// which also must be used when running the diff algorithm.
49    /// `context_size` is the amount of lines around each hunk which will be passed
50    /// to `consume_hunk`.
51    ///
52    /// `consume_hunk` is called for each hunk with all the information required to create a
53    /// unified diff.
54    pub fn new(input: &'a InternedInput<T>, consume_hunk: D, context_size: ContextSize) -> Self {
55        Self {
56            interner: &input.interner,
57            before: &input.before,
58            after: &input.after,
59
60            before_hunk_start: 0,
61            before_hunk_len: 0,
62            after_hunk_len: 0,
63            after_hunk_start: 0,
64            ctx_pos: None,
65
66            ctx_size: context_size.symmetrical,
67
68            buffer: Vec::with_capacity(8),
69            delegate: consume_hunk,
70
71            err: None,
72        }
73    }
74
75    fn print_tokens(&mut self, tokens: &[Token], line_type: DiffLineKind) {
76        for &token in tokens {
77            let content = self.interner[token].as_ref();
78            self.buffer.push((line_type, content));
79        }
80    }
81
82    fn flush_accumulated_hunk(&mut self) -> std::io::Result<()> {
83        if self.nothing_to_flush() {
84            return Ok(());
85        }
86
87        let ctx_pos = self.ctx_pos.expect("has been set if we started a hunk");
88        let end = (ctx_pos + self.ctx_size).min(self.before.len() as u32);
89        self.print_context_and_update_pos(ctx_pos..end, end);
90
91        let hunk_start = self.before_hunk_start + 1;
92        let hunk_end = self.after_hunk_start + 1;
93
94        let header = HunkHeader {
95            before_hunk_start: hunk_start,
96            before_hunk_len: self.before_hunk_len,
97            after_hunk_start: hunk_end,
98            after_hunk_len: self.after_hunk_len,
99        };
100
101        self.delegate.consume_hunk(header, &self.buffer)?;
102
103        self.reset_hunks();
104        Ok(())
105    }
106
107    fn print_context_and_update_pos(&mut self, print: Range<u32>, move_to: u32) {
108        self.print_tokens(
109            &self.before[print.start as usize..print.end as usize],
110            DiffLineKind::Context,
111        );
112
113        let len = print.end - print.start;
114        self.ctx_pos = Some(move_to);
115        self.before_hunk_len += len;
116        self.after_hunk_len += len;
117    }
118
119    fn reset_hunks(&mut self) {
120        self.buffer.clear();
121        self.before_hunk_len = 0;
122        self.after_hunk_len = 0;
123    }
124
125    fn nothing_to_flush(&self) -> bool {
126        self.before_hunk_len == 0 && self.after_hunk_len == 0
127    }
128}
129
130impl<T, D> Sink for UnifiedDiff<'_, T, D>
131where
132    T: Hash + Eq + AsRef<[u8]>,
133    D: ConsumeHunk,
134{
135    type Out = std::io::Result<D::Out>;
136
137    fn process_change(&mut self, before: Range<u32>, after: Range<u32>) {
138        if self.err.is_some() {
139            return;
140        }
141        let start_next_hunk = self
142            .ctx_pos
143            .is_some_and(|ctx_pos| before.start - ctx_pos > 2 * self.ctx_size);
144        if start_next_hunk {
145            if let Err(err) = self.flush_accumulated_hunk() {
146                self.err = Some(err);
147                return;
148            }
149            let ctx_pos = before.start - self.ctx_size;
150            self.ctx_pos = Some(ctx_pos);
151            self.before_hunk_start = ctx_pos;
152            self.after_hunk_start = after.start - self.ctx_size;
153        }
154        let ctx_pos = match self.ctx_pos {
155            None => {
156                // TODO: can this be made so the code above does the job?
157                let ctx_pos = before.start.saturating_sub(self.ctx_size);
158                self.before_hunk_start = ctx_pos;
159                self.after_hunk_start = after.start.saturating_sub(self.ctx_size);
160                ctx_pos
161            }
162            Some(pos) => pos,
163        };
164        self.print_context_and_update_pos(ctx_pos..before.start, before.end);
165        self.before_hunk_len += before.end - before.start;
166        self.after_hunk_len += after.end - after.start;
167
168        self.print_tokens(
169            &self.before[before.start as usize..before.end as usize],
170            DiffLineKind::Remove,
171        );
172        self.print_tokens(&self.after[after.start as usize..after.end as usize], DiffLineKind::Add);
173    }
174
175    fn finish(mut self) -> Self::Out {
176        if let Err(err) = self.flush_accumulated_hunk() {
177            self.err = Some(err);
178        }
179        if let Some(err) = self.err {
180            return Err(err);
181        }
182        Ok(self.delegate.finish())
183    }
184}
185
186/// An implementation that fails if the input isn't UTF-8.
187impl<D> ConsumeHunk for ConsumeBinaryHunk<'_, D>
188where
189    D: ConsumeBinaryHunkDelegate,
190{
191    type Out = D;
192
193    fn consume_hunk(&mut self, header: HunkHeader, lines: &[(DiffLineKind, &[u8])]) -> std::io::Result<()> {
194        self.header_buf.clear();
195        self.header_buf
196            .write_fmt(format_args!("{header}{nl}", nl = self.newline))
197            .map_err(std::io::Error::other)?;
198
199        let buf = &mut self.hunk_buf;
200        buf.clear();
201        for &(line_type, content) in lines {
202            buf.push(line_type.to_prefix() as u8);
203            buf.extend_from_slice(content);
204
205            if !content.ends_with_str(self.newline) {
206                buf.push_str(self.newline);
207            }
208        }
209
210        self.delegate.consume_binary_hunk(header, &self.header_buf, buf)?;
211        Ok(())
212    }
213
214    fn finish(self) -> Self::Out {
215        self.delegate
216    }
217}
218
219/// An implementation that fails if the input isn't UTF-8.
220impl ConsumeBinaryHunkDelegate for String {
221    fn consume_binary_hunk(&mut self, _header: HunkHeader, header_str: &str, hunk: &[u8]) -> std::io::Result<()> {
222        self.push_str(header_str);
223        self.push_str(hunk.to_str().map_err(std::io::Error::other)?);
224        Ok(())
225    }
226}
227
228/// An implementation that writes hunks into a byte buffer.
229impl ConsumeBinaryHunkDelegate for Vec<u8> {
230    fn consume_binary_hunk(&mut self, _header: HunkHeader, header_str: &str, hunk: &[u8]) -> std::io::Result<()> {
231        self.push_str(header_str);
232        self.extend_from_slice(hunk);
233        Ok(())
234    }
235}
236
237/// An implementation that writes hunks into a hunman-readable byte buffer.
238impl ConsumeBinaryHunkDelegate for BString {
239    fn consume_binary_hunk(&mut self, _header: HunkHeader, header_str: &str, hunk: &[u8]) -> std::io::Result<()> {
240        self.push_str(header_str);
241        self.extend_from_slice(hunk);
242        Ok(())
243    }
244}
245
246impl<'a, D> ConsumeBinaryHunk<'a, D>
247where
248    D: ConsumeBinaryHunkDelegate,
249{
250    /// Create a new instance that writes stringified hunks to `delegate`, which uses `newline` to separate header and hunk,
251    /// as well as hunk lines that don't naturally end in a newline.
252    pub fn new(delegate: D, newline: &'a str) -> ConsumeBinaryHunk<'a, D> {
253        ConsumeBinaryHunk {
254            newline,
255            delegate,
256            header_buf: String::new(),
257            hunk_buf: Vec::with_capacity(128),
258        }
259    }
260}
261
262impl DiffLineKind {
263    const fn to_prefix(self) -> char {
264        match self {
265            DiffLineKind::Context => ' ',
266            DiffLineKind::Add => '+',
267            DiffLineKind::Remove => '-',
268        }
269    }
270}
271
272impl std::fmt::Display for HunkHeader {
273    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
274        write!(
275            f,
276            "@@ -{},{} +{},{} @@",
277            self.before_hunk_start, self.before_hunk_len, self.after_hunk_start, self.after_hunk_len
278        )
279    }
280}