1use crate::looks_binary;
6use serde::{Deserialize, Serialize};
7use similar::{ChangeTag, TextDiff};
8
9#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
11pub enum BinaryReason {
12 NullByte,
14 NotUtf8,
16}
17
18#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
21#[serde(rename_all = "snake_case")]
22pub enum DiffLineKind {
23 Delete,
25 Insert,
27 Equal,
29}
30
31#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
33pub struct DiffLine {
34 pub kind: DiffLineKind,
36 #[serde(skip_serializing_if = "Option::is_none")]
38 pub a_line: Option<u32>,
39 #[serde(skip_serializing_if = "Option::is_none")]
41 pub b_line: Option<u32>,
42 pub content: String,
44}
45
46#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
48pub struct DiffHunk {
49 pub a_start: u32,
51 pub a_count: u32,
53 pub b_start: u32,
55 pub b_count: u32,
57 pub lines: Vec<DiffLine>,
59}
60
61#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
63#[serde(rename_all = "snake_case")]
64pub enum FileDiffStatus {
65 Unchanged,
67 Added,
69 Deleted,
71 Modified,
73 Binary {
75 reason: BinaryReason,
77 },
78}
79
80#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
84pub struct FileDiff {
85 pub a_path: Option<String>,
88 pub b_path: Option<String>,
90 pub status: FileDiffStatus,
92 pub hunks: Vec<DiffHunk>,
94 pub additions: u32,
96 pub deletions: u32,
98}
99
100pub fn diff_blobs(
104 a: Option<&[u8]>,
105 b: Option<&[u8]>,
106 a_path: Option<&str>,
107 b_path: Option<&str>,
108 context_lines: usize,
109) -> FileDiff {
110 let status = match (a, b) {
111 (None, None) => FileDiffStatus::Unchanged,
112 (None, Some(_)) => FileDiffStatus::Added,
113 (Some(_), None) => FileDiffStatus::Deleted,
114 (Some(_), Some(_)) => FileDiffStatus::Modified, };
116
117 let mut out = FileDiff {
118 a_path: a_path.map(str::to_string),
119 b_path: b_path.map(str::to_string),
120 status,
121 hunks: Vec::new(),
122 additions: 0,
123 deletions: 0,
124 };
125
126 let any_binary = a.map_or(false, looks_binary) || b.map_or(false, looks_binary);
129 if any_binary {
130 out.status = FileDiffStatus::Binary {
131 reason: BinaryReason::NullByte,
132 };
133 return out;
134 }
135
136 let a_text = match a {
137 Some(b) => match std::str::from_utf8(b) {
138 Ok(s) => Some(s.to_string()),
139 Err(_) => {
140 out.status = FileDiffStatus::Binary {
141 reason: BinaryReason::NotUtf8,
142 };
143 return out;
144 }
145 },
146 None => None,
147 };
148 let b_text = match b {
149 Some(b) => match std::str::from_utf8(b) {
150 Ok(s) => Some(s.to_string()),
151 Err(_) => {
152 out.status = FileDiffStatus::Binary {
153 reason: BinaryReason::NotUtf8,
154 };
155 return out;
156 }
157 },
158 None => None,
159 };
160
161 if a_text.as_deref() == b_text.as_deref() {
162 out.status = FileDiffStatus::Unchanged;
163 return out;
164 }
165
166 let a_str = a_text.as_deref().unwrap_or("");
167 let b_str = b_text.as_deref().unwrap_or("");
168
169 let diff = TextDiff::from_lines(a_str, b_str);
170
171 for group in diff.grouped_ops(context_lines) {
172 let mut hunk_lines: Vec<DiffLine> = Vec::new();
173 let mut hunk_a_start = u32::MAX;
174 let mut hunk_b_start = u32::MAX;
175 let mut hunk_a_count: u32 = 0;
176 let mut hunk_b_count: u32 = 0;
177
178 for op in group {
179 for change in diff.iter_changes(&op) {
180 let content = strip_trailing_newline(change.value());
181 let a_line = change.old_index().map(|i| (i as u32) + 1);
182 let b_line = change.new_index().map(|i| (i as u32) + 1);
183
184 if let Some(n) = a_line {
185 if hunk_a_start == u32::MAX {
186 hunk_a_start = n;
187 }
188 }
189 if let Some(n) = b_line {
190 if hunk_b_start == u32::MAX {
191 hunk_b_start = n;
192 }
193 }
194
195 let kind = match change.tag() {
196 ChangeTag::Delete => {
197 hunk_a_count += 1;
198 out.deletions += 1;
199 DiffLineKind::Delete
200 }
201 ChangeTag::Insert => {
202 hunk_b_count += 1;
203 out.additions += 1;
204 DiffLineKind::Insert
205 }
206 ChangeTag::Equal => {
207 hunk_a_count += 1;
208 hunk_b_count += 1;
209 DiffLineKind::Equal
210 }
211 };
212
213 hunk_lines.push(DiffLine {
214 kind,
215 a_line,
216 b_line,
217 content,
218 });
219 }
220 }
221
222 let a_start = if hunk_a_start == u32::MAX {
224 0
225 } else {
226 hunk_a_start
227 };
228 let b_start = if hunk_b_start == u32::MAX {
229 0
230 } else {
231 hunk_b_start
232 };
233
234 out.hunks.push(DiffHunk {
235 a_start,
236 a_count: hunk_a_count,
237 b_start,
238 b_count: hunk_b_count,
239 lines: hunk_lines,
240 });
241 }
242
243 if out.hunks.is_empty() && a_text.as_deref() == b_text.as_deref() {
244 out.status = FileDiffStatus::Unchanged;
245 }
246 out
247}
248
249pub fn file_diff(
253 a: Option<&str>,
254 b: Option<&str>,
255 a_path: Option<&str>,
256 b_path: Option<&str>,
257 context_lines: usize,
258) -> FileDiff {
259 diff_blobs(
260 a.map(str::as_bytes),
261 b.map(str::as_bytes),
262 a_path,
263 b_path,
264 context_lines,
265 )
266}
267
268fn strip_trailing_newline(s: &str) -> String {
269 s.strip_suffix("\r\n")
270 .or_else(|| s.strip_suffix('\n'))
271 .unwrap_or(s)
272 .to_string()
273}
274
275#[cfg(test)]
276mod tests {
277 use super::*;
278
279 #[test]
280 fn unchanged_files_produce_no_hunks() {
281 let d = file_diff(
282 Some("a\nb\nc\n"),
283 Some("a\nb\nc\n"),
284 Some("x.txt"),
285 Some("x.txt"),
286 3,
287 );
288 assert_eq!(d.status, FileDiffStatus::Unchanged);
289 assert!(d.hunks.is_empty());
290 assert_eq!(d.additions, 0);
291 assert_eq!(d.deletions, 0);
292 }
293
294 #[test]
295 fn added_file_yields_only_inserts() {
296 let d = file_diff(None, Some("hello\nworld\n"), None, Some("greet.txt"), 3);
297 assert_eq!(d.status, FileDiffStatus::Added);
298 assert_eq!(d.additions, 2);
299 assert_eq!(d.deletions, 0);
300 let lines: Vec<_> = d
301 .hunks
302 .iter()
303 .flat_map(|h| h.lines.iter().map(|l| l.kind))
304 .collect();
305 assert!(lines.iter().all(|k| matches!(k, DiffLineKind::Insert)));
306 }
307
308 #[test]
309 fn deleted_file_yields_only_deletes() {
310 let d = file_diff(Some("a\nb\n"), None, Some("gone.txt"), None, 3);
311 assert_eq!(d.status, FileDiffStatus::Deleted);
312 assert_eq!(d.additions, 0);
313 assert_eq!(d.deletions, 2);
314 }
315
316 #[test]
317 fn modified_file_groups_hunks_with_context() {
318 let a = "alpha\nbeta\ngamma\ndelta\nepsilon\n";
319 let b = "alpha\nBETA\ngamma\ndelta\nepsilon\n";
320 let d = file_diff(Some(a), Some(b), Some("greek.txt"), Some("greek.txt"), 1);
321 assert_eq!(d.status, FileDiffStatus::Modified);
322 assert_eq!(d.additions, 1);
323 assert_eq!(d.deletions, 1);
324 assert_eq!(d.hunks.len(), 1);
326 let kinds: Vec<_> = d.hunks[0].lines.iter().map(|l| l.kind).collect();
327 assert_eq!(
328 kinds,
329 vec![
330 DiffLineKind::Equal,
331 DiffLineKind::Delete,
332 DiffLineKind::Insert,
333 DiffLineKind::Equal,
334 ]
335 );
336 }
337
338 #[test]
339 fn binary_blobs_surface_as_binary() {
340 let a = b"hello\n".to_vec();
341 let mut b = b"hello".to_vec();
342 b.push(0); let d = diff_blobs(Some(&a), Some(&b), Some("x"), Some("x"), 3);
344 assert!(matches!(
345 d.status,
346 FileDiffStatus::Binary {
347 reason: BinaryReason::NullByte
348 }
349 ));
350 }
351
352 #[test]
353 fn round_trips_through_json() {
354 let d = file_diff(Some("a\n"), Some("b\n"), Some("p"), Some("p"), 3);
355 let s = serde_json::to_string(&d).unwrap();
356 let back: FileDiff = serde_json::from_str(&s).unwrap();
357 assert_eq!(d, back);
358 }
359}