Skip to main content

gix_diff/blob/unified_diff/
impls.rs

1use bstr::{BString, ByteSlice, ByteVec};
2use imara_diff::{Diff, InternedInput, Interner, Token};
3use std::fmt::Write;
4use std::{hash::Hash, ops::Range};
5
6use super::{ConsumeBinaryHunk, ConsumeBinaryHunkDelegate, ConsumeHunk, ContextSize, DiffLineKind, HunkHeader};
7
8/// A helper that renders a [`Diff`] as unified diff output.
9/// It can be used to create a textual diff in the format typically output by `git`
10/// or `gnu-diff` if the `-u` option is used.
11pub struct UnifiedDiff<'a, T, D>
12where
13    T: Hash + Eq + AsRef<[u8]>,
14    D: ConsumeHunk,
15{
16    diff: &'a Diff,
17    before: &'a [Token],
18    after: &'a [Token],
19    interner: &'a Interner<T>,
20
21    /// The 0-based start position in the 'before' tokens for the accumulated hunk for display in the header.
22    before_hunk_start: u32,
23    /// The size of the accumulated 'before' hunk in lines for display in the header.
24    before_hunk_len: u32,
25    /// The 0-based start position in the 'after' tokens for the accumulated hunk for display in the header.
26    after_hunk_start: u32,
27    /// The size of the accumulated 'after' hunk in lines.
28    after_hunk_len: u32,
29    // An index into `before` and the context line to print next,
30    // or `None` if this value was never computed to be the correct starting point for an accumulated hunk.
31    ctx_pos: Option<u32>,
32
33    /// Symmetrical context before and after the changed hunk.
34    ctx_size: u32,
35
36    buffer: Vec<(DiffLineKind, &'a [u8])>,
37
38    delegate: D,
39
40    err: Option<std::io::Error>,
41}
42
43impl<'a, T, D> UnifiedDiff<'a, T, D>
44where
45    T: Hash + Eq + AsRef<[u8]>,
46    D: ConsumeHunk,
47{
48    /// Create a new instance to create a unified diff from `diff` using the lines in `input`.
49    /// `context_size` is the amount of lines around each hunk which will be passed to `consume_hunk`.
50    ///
51    /// `consume_hunk` is called for each hunk with all the information required to create a unified diff.
52    pub fn new(diff: &'a Diff, input: &'a InternedInput<T>, consume_hunk: D, context_size: ContextSize) -> Self {
53        Self {
54            diff,
55            interner: &input.interner,
56            before: &input.before,
57            after: &input.after,
58
59            before_hunk_start: 0,
60            before_hunk_len: 0,
61            after_hunk_len: 0,
62            after_hunk_start: 0,
63            ctx_pos: None,
64
65            ctx_size: context_size.symmetrical,
66
67            buffer: Vec::with_capacity(8),
68            delegate: consume_hunk,
69
70            err: None,
71        }
72    }
73
74    fn print_tokens(&mut self, tokens: &[Token], line_type: DiffLineKind) {
75        for &token in tokens {
76            let content = self.interner[token].as_ref();
77            self.buffer.push((line_type, content));
78        }
79    }
80
81    fn flush_accumulated_hunk(&mut self) -> std::io::Result<()> {
82        if self.nothing_to_flush() {
83            return Ok(());
84        }
85
86        let ctx_pos = self.ctx_pos.expect("has been set if we started a hunk");
87        let end = (ctx_pos + self.ctx_size).min(self.before.len() as u32);
88        self.print_context_and_update_pos(ctx_pos..end, end);
89
90        let hunk_start = self.before_hunk_start + 1;
91        let hunk_end = self.after_hunk_start + 1;
92
93        let header = HunkHeader {
94            before_hunk_start: hunk_start,
95            before_hunk_len: self.before_hunk_len,
96            after_hunk_start: hunk_end,
97            after_hunk_len: self.after_hunk_len,
98        };
99
100        self.delegate.consume_hunk(header, &self.buffer)?;
101
102        self.reset_hunks();
103        Ok(())
104    }
105
106    fn print_context_and_update_pos(&mut self, print: Range<u32>, move_to: u32) {
107        self.print_tokens(
108            &self.before[print.start as usize..print.end as usize],
109            DiffLineKind::Context,
110        );
111
112        let len = print.end - print.start;
113        self.ctx_pos = Some(move_to);
114        self.before_hunk_len += len;
115        self.after_hunk_len += len;
116    }
117
118    fn reset_hunks(&mut self) {
119        self.buffer.clear();
120        self.before_hunk_len = 0;
121        self.after_hunk_len = 0;
122    }
123
124    fn nothing_to_flush(&self) -> bool {
125        self.before_hunk_len == 0 && self.after_hunk_len == 0
126    }
127
128    fn process_change(&mut self, before: Range<u32>, after: Range<u32>) {
129        if self.err.is_some() {
130            return;
131        }
132        let start_next_hunk = self
133            .ctx_pos
134            .is_some_and(|ctx_pos| before.start - ctx_pos > 2 * self.ctx_size);
135        if start_next_hunk {
136            if let Err(err) = self.flush_accumulated_hunk() {
137                self.err = Some(err);
138                return;
139            }
140            let ctx_pos = before.start - self.ctx_size;
141            self.ctx_pos = Some(ctx_pos);
142            self.before_hunk_start = ctx_pos;
143            self.after_hunk_start = after.start - self.ctx_size;
144        }
145        let ctx_pos = match self.ctx_pos {
146            None => {
147                // TODO: can this be made so the code above does the job?
148                let ctx_pos = before.start.saturating_sub(self.ctx_size);
149                self.before_hunk_start = ctx_pos;
150                self.after_hunk_start = after.start.saturating_sub(self.ctx_size);
151                ctx_pos
152            }
153            Some(pos) => pos,
154        };
155        self.print_context_and_update_pos(ctx_pos..before.start, before.end);
156        self.before_hunk_len += before.end - before.start;
157        self.after_hunk_len += after.end - after.start;
158
159        self.print_tokens(
160            &self.before[before.start as usize..before.end as usize],
161            DiffLineKind::Remove,
162        );
163        self.print_tokens(&self.after[after.start as usize..after.end as usize], DiffLineKind::Add);
164    }
165
166    /// Consume all hunks from `diff` and return the delegate's final output.
167    pub fn consume(mut self) -> std::io::Result<D::Out> {
168        for hunk in self.diff.hunks() {
169            self.process_change(hunk.before, hunk.after);
170        }
171        if let Err(err) = self.flush_accumulated_hunk() {
172            self.err = Some(err);
173        }
174        if let Some(err) = self.err {
175            return Err(err);
176        }
177        Ok(self.delegate.finish())
178    }
179}
180
181/// An implementation that fails if the input isn't UTF-8.
182impl<D> ConsumeHunk for ConsumeBinaryHunk<'_, D>
183where
184    D: ConsumeBinaryHunkDelegate,
185{
186    type Out = D;
187
188    fn consume_hunk(&mut self, header: HunkHeader, lines: &[(DiffLineKind, &[u8])]) -> std::io::Result<()> {
189        self.header_buf.clear();
190        self.header_buf
191            .write_fmt(format_args!("{header}{nl}", nl = self.newline))
192            .map_err(std::io::Error::other)?;
193
194        let buf = &mut self.hunk_buf;
195        buf.clear();
196        for &(line_type, content) in lines {
197            buf.push(line_type.to_prefix() as u8);
198            buf.extend_from_slice(content);
199
200            if !content.ends_with_str(self.newline) {
201                buf.push_str(self.newline);
202            }
203        }
204
205        self.delegate.consume_binary_hunk(header, &self.header_buf, buf)?;
206        Ok(())
207    }
208
209    fn finish(self) -> Self::Out {
210        self.delegate
211    }
212}
213
214/// An implementation that fails if the input isn't UTF-8.
215impl ConsumeBinaryHunkDelegate for String {
216    fn consume_binary_hunk(&mut self, _header: HunkHeader, header_str: &str, hunk: &[u8]) -> std::io::Result<()> {
217        self.push_str(header_str);
218        self.push_str(hunk.to_str().map_err(std::io::Error::other)?);
219        Ok(())
220    }
221}
222
223/// An implementation that writes hunks into a byte buffer.
224impl ConsumeBinaryHunkDelegate for Vec<u8> {
225    fn consume_binary_hunk(&mut self, _header: HunkHeader, header_str: &str, hunk: &[u8]) -> std::io::Result<()> {
226        self.push_str(header_str);
227        self.extend_from_slice(hunk);
228        Ok(())
229    }
230}
231
232/// An implementation that writes hunks into a hunman-readable byte buffer.
233impl ConsumeBinaryHunkDelegate for BString {
234    fn consume_binary_hunk(&mut self, _header: HunkHeader, header_str: &str, hunk: &[u8]) -> std::io::Result<()> {
235        self.push_str(header_str);
236        self.extend_from_slice(hunk);
237        Ok(())
238    }
239}
240
241impl<'a, D> ConsumeBinaryHunk<'a, D>
242where
243    D: ConsumeBinaryHunkDelegate,
244{
245    /// Create a new instance that writes stringified hunks to `delegate`, which uses `newline` to separate header and hunk,
246    /// as well as hunk lines that don't naturally end in a newline.
247    pub fn new(delegate: D, newline: &'a str) -> ConsumeBinaryHunk<'a, D> {
248        ConsumeBinaryHunk {
249            newline,
250            delegate,
251            header_buf: String::new(),
252            hunk_buf: Vec::with_capacity(128),
253        }
254    }
255}
256
257impl DiffLineKind {
258    /// Returns a one-character representation for use in unified diffs.
259    pub const fn to_prefix(self) -> char {
260        match self {
261            DiffLineKind::Context => ' ',
262            DiffLineKind::Add => '+',
263            DiffLineKind::Remove => '-',
264        }
265    }
266}
267
268impl std::fmt::Display for HunkHeader {
269    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
270        write!(
271            f,
272            "@@ -{},{} +{},{} @@",
273            self.before_hunk_start, self.before_hunk_len, self.after_hunk_start, self.after_hunk_len
274        )
275    }
276}