1#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, Ord, PartialOrd)]
10pub struct ContextSize {
11 symmetrical: u32,
13}
14
15impl Default for ContextSize {
16 fn default() -> Self {
17 ContextSize::symmetrical(3)
18 }
19}
20
21impl ContextSize {
23 pub fn symmetrical(n: u32) -> Self {
25 ContextSize { symmetrical: n }
26 }
27}
28
29#[derive(Debug, Copy, Clone)]
31pub enum NewlineSeparator<'a> {
32 AfterHeaderAndLine(&'a str),
35 AfterHeaderAndWhenNeeded(&'a str),
40}
41
42pub trait ConsumeHunk {
44 type Out;
46
47 fn consume_hunk(
59 &mut self,
60 before_hunk_start: u32,
61 before_hunk_len: u32,
62 after_hunk_start: u32,
63 after_hunk_len: u32,
64 header: &str,
65 hunk: &[u8],
66 ) -> std::io::Result<()>;
67 fn finish(self) -> Self::Out;
69}
70
71pub(super) mod _impl {
72 use std::{hash::Hash, io::ErrorKind, ops::Range};
73
74 use bstr::{ByteSlice, ByteVec};
75 use imara_diff::{intern, Sink};
76 use intern::{InternedInput, Interner, Token};
77
78 use super::{ConsumeHunk, ContextSize, NewlineSeparator};
79
80 const CONTEXT: char = ' ';
81 const ADDITION: char = '+';
82 const REMOVAL: char = '-';
83
84 pub struct UnifiedDiff<'a, T, D>
87 where
88 T: Hash + Eq + AsRef<[u8]>,
89 D: ConsumeHunk,
90 {
91 before: &'a [Token],
92 after: &'a [Token],
93 interner: &'a Interner<T>,
94
95 before_hunk_start: u32,
97 before_hunk_len: u32,
99 after_hunk_start: u32,
101 after_hunk_len: u32,
103 ctx_pos: Option<u32>,
106
107 ctx_size: u32,
109 newline: NewlineSeparator<'a>,
110
111 buffer: Vec<u8>,
112 header_buf: String,
113 delegate: D,
114
115 err: Option<std::io::Error>,
116 }
117
118 impl<'a, T, D> UnifiedDiff<'a, T, D>
119 where
120 T: Hash + Eq + AsRef<[u8]>,
121 D: ConsumeHunk,
122 {
123 pub fn new(
130 input: &'a InternedInput<T>,
131 consume_hunk: D,
132 newline_separator: NewlineSeparator<'a>,
133 context_size: ContextSize,
134 ) -> Self {
135 Self {
136 interner: &input.interner,
137 before: &input.before,
138 after: &input.after,
139
140 before_hunk_start: 0,
141 before_hunk_len: 0,
142 after_hunk_len: 0,
143 after_hunk_start: 0,
144 ctx_pos: None,
145
146 ctx_size: context_size.symmetrical,
147 newline: newline_separator,
148
149 buffer: Vec::with_capacity(8),
150 header_buf: String::new(),
151 delegate: consume_hunk,
152
153 err: None,
154 }
155 }
156
157 fn print_tokens(&mut self, tokens: &[Token], prefix: char) {
158 for &token in tokens {
159 self.buffer.push_char(prefix);
160 let line = &self.interner[token];
161 self.buffer.push_str(line);
162 match self.newline {
163 NewlineSeparator::AfterHeaderAndLine(nl) => {
164 self.buffer.push_str(nl);
165 }
166 NewlineSeparator::AfterHeaderAndWhenNeeded(nl) => {
167 if !line.as_ref().ends_with_str(nl) {
168 self.buffer.push_str(nl);
169 }
170 }
171 }
172 }
173 }
174
175 fn flush_accumulated_hunk(&mut self) -> std::io::Result<()> {
176 if self.nothing_to_flush() {
177 return Ok(());
178 }
179
180 let ctx_pos = self.ctx_pos.expect("has been set if we started a hunk");
181 let end = (ctx_pos + self.ctx_size).min(self.before.len() as u32);
182 self.print_context_and_update_pos(ctx_pos..end, end);
183
184 let hunk_start = self.before_hunk_start + 1;
185 let hunk_end = self.after_hunk_start + 1;
186 self.header_buf.clear();
187 std::fmt::Write::write_fmt(
188 &mut self.header_buf,
189 format_args!(
190 "@@ -{},{} +{},{} @@{nl}",
191 hunk_start,
192 self.before_hunk_len,
193 hunk_end,
194 self.after_hunk_len,
195 nl = match self.newline {
196 NewlineSeparator::AfterHeaderAndLine(nl) | NewlineSeparator::AfterHeaderAndWhenNeeded(nl) => {
197 nl
198 }
199 }
200 ),
201 )
202 .map_err(|err| std::io::Error::new(ErrorKind::Other, err))?;
203 self.delegate.consume_hunk(
204 hunk_start,
205 self.before_hunk_len,
206 hunk_end,
207 self.after_hunk_len,
208 &self.header_buf,
209 &self.buffer,
210 )?;
211
212 self.reset_hunks();
213 Ok(())
214 }
215
216 fn print_context_and_update_pos(&mut self, print: Range<u32>, move_to: u32) {
217 self.print_tokens(&self.before[print.start as usize..print.end as usize], CONTEXT);
218 let len = print.end - print.start;
219 self.ctx_pos = Some(move_to);
220 self.before_hunk_len += len;
221 self.after_hunk_len += len;
222 }
223
224 fn reset_hunks(&mut self) {
225 self.buffer.clear();
226 self.before_hunk_len = 0;
227 self.after_hunk_len = 0;
228 }
229
230 fn nothing_to_flush(&self) -> bool {
231 self.before_hunk_len == 0 && self.after_hunk_len == 0
232 }
233 }
234
235 impl<T, D> Sink for UnifiedDiff<'_, T, D>
236 where
237 T: Hash + Eq + AsRef<[u8]>,
238 D: ConsumeHunk,
239 {
240 type Out = std::io::Result<D::Out>;
241
242 fn process_change(&mut self, before: Range<u32>, after: Range<u32>) {
243 if self.err.is_some() {
244 return;
245 }
246 let start_next_hunk = self
247 .ctx_pos
248 .is_some_and(|ctx_pos| before.start - ctx_pos > 2 * self.ctx_size);
249 if start_next_hunk {
250 if let Err(err) = self.flush_accumulated_hunk() {
251 self.err = Some(err);
252 return;
253 }
254 let ctx_pos = before.start - self.ctx_size;
255 self.ctx_pos = Some(ctx_pos);
256 self.before_hunk_start = ctx_pos;
257 self.after_hunk_start = after.start - self.ctx_size;
258 }
259 let ctx_pos = match self.ctx_pos {
260 None => {
261 let ctx_pos = before.start.saturating_sub(self.ctx_size);
263 self.before_hunk_start = ctx_pos;
264 self.after_hunk_start = after.start.saturating_sub(self.ctx_size);
265 ctx_pos
266 }
267 Some(pos) => pos,
268 };
269 self.print_context_and_update_pos(ctx_pos..before.start, before.end);
270 self.before_hunk_len += before.end - before.start;
271 self.after_hunk_len += after.end - after.start;
272
273 self.print_tokens(&self.before[before.start as usize..before.end as usize], REMOVAL);
274 self.print_tokens(&self.after[after.start as usize..after.end as usize], ADDITION);
275 }
276
277 fn finish(mut self) -> Self::Out {
278 if let Err(err) = self.flush_accumulated_hunk() {
279 self.err = Some(err);
280 }
281 if let Some(err) = self.err {
282 return Err(err);
283 }
284 Ok(self.delegate.finish())
285 }
286 }
287
288 impl ConsumeHunk for String {
290 type Out = Self;
291
292 fn consume_hunk(&mut self, _: u32, _: u32, _: u32, _: u32, header: &str, hunk: &[u8]) -> std::io::Result<()> {
293 self.push_str(header);
294 self.push_str(
295 hunk.to_str()
296 .map_err(|err| std::io::Error::new(ErrorKind::Other, err))?,
297 );
298 Ok(())
299 }
300
301 fn finish(self) -> Self::Out {
302 self
303 }
304 }
305
306 impl ConsumeHunk for Vec<u8> {
308 type Out = Self;
309
310 fn consume_hunk(&mut self, _: u32, _: u32, _: u32, _: u32, header: &str, hunk: &[u8]) -> std::io::Result<()> {
311 self.push_str(header);
312 self.push_str(hunk);
313 Ok(())
314 }
315
316 fn finish(self) -> Self::Out {
317 self
318 }
319 }
320}