1#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
12#[cfg_attr(feature = "serde", derive(serde::Serialize))]
13#[non_exhaustive]
14pub struct DiffStat {
15 pub files_changed: usize,
17 pub insertions: usize,
19 pub deletions: usize,
21}
22
23impl DiffStat {
24 pub fn new(files_changed: usize, insertions: usize, deletions: usize) -> Self {
28 Self {
29 files_changed,
30 insertions,
31 deletions,
32 }
33 }
34}
35
36#[derive(Debug, Clone, Copy, PartialEq, Eq)]
38#[cfg_attr(feature = "serde", derive(serde::Serialize))]
39#[non_exhaustive]
40pub enum ChangeKind {
41 Added,
43 Modified,
45 Deleted,
47 Renamed,
49}
50
51#[derive(Debug, Clone, PartialEq, Eq)]
56#[cfg_attr(feature = "serde", derive(serde::Serialize))]
57#[non_exhaustive]
58pub enum DiffLine {
59 Context(String),
61 Added(String),
63 Removed(String),
65}
66
67#[derive(Debug, Clone, PartialEq, Eq)]
69#[cfg_attr(feature = "serde", derive(serde::Serialize))]
70#[non_exhaustive]
71pub struct Hunk {
72 pub old_start: usize,
74 pub old_lines: usize,
76 pub new_start: usize,
78 pub new_lines: usize,
80 pub section: String,
82 pub lines: Vec<DiffLine>,
84}
85
86#[derive(Debug, Clone, PartialEq, Eq)]
89#[cfg_attr(feature = "serde", derive(serde::Serialize))]
90#[non_exhaustive]
91pub struct FileDiff {
92 pub change: ChangeKind,
94 pub path: String,
96 pub old_path: Option<String>,
98 pub hunks: Vec<Hunk>,
100 pub raw: String,
103}
104
105pub fn parse_diff(diff: &str) -> Vec<FileDiff> {
113 diff_sections(diff).filter_map(parse_section).collect()
114}
115
116fn diff_sections(full: &str) -> impl Iterator<Item = &str> {
118 let mut bounds = Vec::new();
119 let mut idx = 0;
120 for line in full.split_inclusive('\n') {
121 if line.starts_with("diff --git ") {
122 bounds.push(idx);
123 }
124 idx += line.len();
125 }
126 let ends = bounds
127 .iter()
128 .skip(1)
129 .copied()
130 .chain(std::iter::once(full.len()));
131 bounds
132 .clone()
133 .into_iter()
134 .zip(ends)
135 .map(move |(s, e)| &full[s..e])
136 .collect::<Vec<_>>()
137 .into_iter()
138}
139
140fn parse_section(section: &str) -> Option<FileDiff> {
143 let mut kind = ChangeKind::Modified;
144 let mut new_path = None;
145 let mut minus_path = None;
146 let mut rename_to = None;
147 let mut rename_from = None;
148 let mut hunks: Vec<Hunk> = Vec::new();
149 let mut current: Option<Hunk> = None;
150
151 for line in section.lines() {
152 if let Some(hunk) = parse_hunk_header(line) {
153 if let Some(done) = current.replace(hunk) {
154 hunks.push(done);
155 }
156 continue;
157 }
158 if let Some(hunk) = current.as_mut() {
159 match line.as_bytes().first() {
162 Some(b' ') => hunk.lines.push(DiffLine::Context(line[1..].to_string())),
163 Some(b'+') => hunk.lines.push(DiffLine::Added(line[1..].to_string())),
164 Some(b'-') => hunk.lines.push(DiffLine::Removed(line[1..].to_string())),
165 _ => {}
166 }
167 continue;
168 }
169 if line.starts_with("new file") {
171 kind = ChangeKind::Added;
172 } else if line.starts_with("deleted file") {
173 kind = ChangeKind::Deleted;
174 } else if let Some(p) = line.strip_prefix("rename to ") {
175 rename_to = Some(unquote_git_path(p.trim_end()));
178 } else if let Some(p) = line.strip_prefix("rename from ") {
179 rename_from = Some(unquote_git_path(p.trim_end()));
180 } else if let Some(rest) = line.strip_prefix("+++ ") {
181 new_path = unquote_git_path(rest.trim_end())
185 .strip_prefix("b/")
186 .map(str::to_string);
187 } else if let Some(rest) = line.strip_prefix("--- ") {
188 minus_path = unquote_git_path(rest.trim_end())
189 .strip_prefix("a/")
190 .map(str::to_string);
191 }
192 }
193 if let Some(done) = current.take() {
194 hunks.push(done);
195 }
196
197 let normalize = |p: String| p.replace('\\', "/");
198 let old_path = if rename_to.is_some() {
200 kind = ChangeKind::Renamed;
201 rename_from.map(normalize)
202 } else {
203 None
204 };
205 let path = [rename_to, new_path, minus_path]
210 .into_iter()
211 .flatten()
212 .find(|p| !p.is_empty())
213 .or_else(|| header_b_path(section))?;
214 Some(FileDiff {
215 change: kind,
216 path: normalize(path),
217 old_path,
218 hunks,
219 raw: section.to_string(),
220 })
221}
222
223fn parse_hunk_header(line: &str) -> Option<Hunk> {
226 let rest = line.strip_prefix("@@ ")?;
227 let (ranges, section) = rest.split_once(" @@")?;
228 let mut parts = ranges.split_whitespace();
229 let (old_start, old_lines) = parse_hunk_range(parts.next()?.strip_prefix('-')?);
230 let (new_start, new_lines) = parse_hunk_range(parts.next()?.strip_prefix('+')?);
231 Some(Hunk {
232 old_start,
233 old_lines,
234 new_start,
235 new_lines,
236 section: section.strip_prefix(' ').unwrap_or(section).to_string(),
237 lines: Vec::new(),
238 })
239}
240
241fn parse_hunk_range(range: &str) -> (usize, usize) {
243 match range.split_once(',') {
244 Some((start, count)) => (start.parse().unwrap_or(0), count.parse().unwrap_or(0)),
245 None => (range.parse().unwrap_or(0), 1),
246 }
247}
248
249fn header_b_path(section: &str) -> Option<String> {
255 let first = section.lines().next()?;
256 let s = first.strip_prefix("diff --git ")?;
257 let path = if let Some(q) = s.rfind("\"b/") {
260 unquote_git_path(&s[q..])
261 .strip_prefix("b/")
262 .unwrap_or("")
263 .to_string()
264 } else {
265 let idx = s.find(" b/")?;
266 s[idx + 1..].strip_prefix("b/").unwrap_or("").to_string()
267 };
268 (!path.is_empty()).then_some(path)
271}
272
273fn unquote_git_path(s: &str) -> String {
281 let bytes = s.as_bytes();
282 if bytes.first() != Some(&b'"') {
283 return s.to_string();
284 }
285 let mut out: Vec<u8> = Vec::with_capacity(bytes.len());
286 let mut i = 1; while i < bytes.len() {
288 match bytes[i] {
289 b'"' => break, b'\\' if i + 1 < bytes.len() => {
291 i += 1;
292 match bytes[i] {
293 b'a' => out.push(0x07),
294 b'b' => out.push(0x08),
295 b't' => out.push(b'\t'),
296 b'n' => out.push(b'\n'),
297 b'v' => out.push(0x0b),
298 b'f' => out.push(0x0c),
299 b'r' => out.push(b'\r'),
300 b'"' => out.push(b'"'),
301 b'\\' => out.push(b'\\'),
302 d @ b'0'..=b'7' => {
303 let mut val = u32::from(d - b'0');
305 let mut taken = 0;
306 while taken < 2
307 && i + 1 < bytes.len()
308 && (b'0'..=b'7').contains(&bytes[i + 1])
309 {
310 i += 1;
311 val = val * 8 + u32::from(bytes[i] - b'0');
312 taken += 1;
313 }
314 out.push(val as u8);
315 }
316 other => out.push(other), }
318 i += 1;
319 }
320 b => {
321 out.push(b);
322 i += 1;
323 }
324 }
325 }
326 String::from_utf8_lossy(&out).into_owned()
327}
328
329#[cfg(test)]
330mod tests {
331 use super::*;
332
333 #[test]
334 fn diff_covers_add_modify_delete_rename() {
335 let full = concat!(
338 "diff --git a/new b/new\n",
339 "new file mode 100644\n--- /dev/null\n+++ b/new\n@@ -0,0 +1 @@\n+n\n",
340 "diff --git a/mod b/mod\n",
341 "--- a/mod\n+++ b/mod\n@@ -1 +1 @@\n-a\n+b\n",
342 "diff --git a/gone b/gone\n",
343 "deleted file mode 100644\n--- a/gone\n+++ /dev/null\n@@ -1 +0,0 @@\n-x\n",
344 "diff --git a/old/f.txt b/new/f.txt\n",
345 "similarity index 100%\nrename from old/f.txt\nrename to new/f.txt\n",
346 );
347 let files = parse_diff(full);
348 let kinds: Vec<_> = files.iter().map(|f| (f.path.as_str(), f.change)).collect();
349 assert_eq!(
350 kinds,
351 vec![
352 ("new", ChangeKind::Added),
353 ("mod", ChangeKind::Modified),
354 ("gone", ChangeKind::Deleted),
355 ("new/f.txt", ChangeKind::Renamed),
356 ]
357 );
358 let rename = files
360 .iter()
361 .find(|f| f.change == ChangeKind::Renamed)
362 .unwrap();
363 assert_eq!(rename.old_path.as_deref(), Some("old/f.txt"));
364 }
365
366 #[test]
367 fn diff_handles_space_paths() {
368 let full = "diff --git a/a b/c.txt b/a b/c.txt\n--- a/a b/c.txt\t\n+++ b/a b/c.txt\t\n@@ -1 +1 @@\n-x\n+y\n";
371 let files = parse_diff(full);
372 assert_eq!(files.len(), 1);
373 assert_eq!(files[0].path, "a b/c.txt");
374 }
375
376 #[test]
381 fn diff_unquotes_non_ascii_modify() {
382 let full = concat!(
383 "diff --git \"a/caf\\303\\251.txt\" \"b/caf\\303\\251.txt\"\n",
384 "index 45b983b..b023018 100644\n",
385 "--- \"a/caf\\303\\251.txt\"\n",
386 "+++ \"b/caf\\303\\251.txt\"\n",
387 "@@ -1 +1 @@\n-hi\n+bye\n",
388 );
389 let files = parse_diff(full);
390 assert_eq!(files.len(), 1, "the non-ASCII file must not be dropped");
391 assert_eq!(files[0].path, "café.txt");
392 assert_eq!(files[0].change, ChangeKind::Modified);
393 }
394
395 #[test]
396 fn diff_unquotes_non_ascii_rename() {
397 let full = concat!(
398 "diff --git \"a/caf\\303\\251.txt\" \"b/r\\303\\251sum\\303\\251.txt\"\n",
399 "similarity index 100%\n",
400 "rename from \"caf\\303\\251.txt\"\n",
401 "rename to \"r\\303\\251sum\\303\\251.txt\"\n",
402 );
403 let files = parse_diff(full);
404 assert_eq!(files.len(), 1);
405 assert_eq!(files[0].path, "résumé.txt");
406 assert_eq!(files[0].change, ChangeKind::Renamed);
407 assert_eq!(files[0].old_path.as_deref(), Some("café.txt"));
408 }
409
410 #[test]
413 fn diff_unquotes_quoted_header_fallback() {
414 let full = concat!(
415 "diff --git \"a/caf\\303\\251.bin\" \"b/caf\\303\\251.bin\"\n",
416 "index 0000000..1111111 100644\n",
417 "Binary files \"a/caf\\303\\251.bin\" and \"b/caf\\303\\251.bin\" differ\n",
418 );
419 let files = parse_diff(full);
420 assert_eq!(files.len(), 1);
421 assert_eq!(files[0].path, "café.bin");
422 }
423
424 #[test]
426 fn diff_unquotes_escaped_tab_path() {
427 let full = "diff --git \"a/a\\tb.txt\" \"b/a\\tb.txt\"\n--- \"a/a\\tb.txt\"\n+++ \"b/a\\tb.txt\"\n@@ -1 +1 @@\n-x\n+y\n";
428 let files = parse_diff(full);
429 assert_eq!(files.len(), 1);
430 assert_eq!(files[0].path, "a\tb.txt");
431 }
432
433 #[test]
434 fn unquote_git_path_decodes_escapes_and_passes_through_plain() {
435 assert_eq!(unquote_git_path("b/plain.txt"), "b/plain.txt"); assert_eq!(unquote_git_path("\"b/caf\\303\\251.txt\""), "b/café.txt"); assert_eq!(unquote_git_path("\"a\\tb\""), "a\tb"); assert_eq!(unquote_git_path("\"a\\\\b\""), "a\\b"); assert_eq!(unquote_git_path("\"a\\\"b\""), "a\"b"); }
441
442 #[test]
443 fn diff_drops_sections_with_no_resolvable_path() {
444 let bad = "diff --git a/x b/\nbinary files differ\n";
447 assert!(parse_diff(bad).is_empty());
448 let recover = "diff --git a/real.txt b/real.txt\n+++ b/\nbinary files differ\n";
451 let files = parse_diff(recover);
452 assert_eq!(files.len(), 1);
453 assert_eq!(files[0].path, "real.txt");
454 let mode_only = "diff --git a/f.sh b/f.sh\nold mode 100644\nnew mode 100755\n";
457 let files = parse_diff(mode_only);
458 assert_eq!(files.len(), 1);
459 assert_eq!(files[0].path, "f.sh");
460 }
461
462 #[test]
463 fn diff_parses_hunk_ranges_and_body() {
464 let full = "diff --git a/f b/f\n--- a/f\n+++ b/f\n@@ -1,2 +1,3 @@ fn main()\n ctx\n-old\n+new\n+added\n";
465 let files = parse_diff(full);
466 assert_eq!(files.len(), 1);
467 assert_eq!(files[0].raw, full);
469 let hunk = &files[0].hunks[0];
470 assert_eq!(
471 (
472 hunk.old_start,
473 hunk.old_lines,
474 hunk.new_start,
475 hunk.new_lines
476 ),
477 (1, 2, 1, 3)
478 );
479 assert_eq!(hunk.section, "fn main()");
480 assert_eq!(
481 hunk.lines,
482 vec![
483 DiffLine::Context("ctx".into()),
484 DiffLine::Removed("old".into()),
485 DiffLine::Added("new".into()),
486 DiffLine::Added("added".into()),
487 ]
488 );
489 }
490
491 #[test]
492 fn diff_omitted_count_defaults_to_one() {
493 let full = "diff --git a/f b/f\n--- a/f\n+++ b/f\n@@ -3 +3 @@\n-a\n+b\n";
495 let hunk = &parse_diff(full)[0].hunks[0];
496 assert_eq!((hunk.old_start, hunk.old_lines), (3, 1));
497 assert_eq!((hunk.new_start, hunk.new_lines), (3, 1));
498 }
499}
500
501#[cfg(test)]
506mod proptests {
507 use super::*;
508 use proptest::prelude::*;
509
510 fn diff_line() -> impl Strategy<Value = String> {
513 prop_oneof![
514 Just("diff --git a/f b/f\n".to_string()),
515 Just("--- a/f\n".to_string()),
516 Just("+++ b/f\n".to_string()),
517 Just("@@ -1,2 +3,4 @@ ctx\n".to_string()),
518 Just("@@ -1 +1 @@\n".to_string()),
519 Just("new file mode 100644\n".to_string()),
520 Just("deleted file mode 100644\n".to_string()),
521 Just("rename from {old => new}.rs\n".to_string()),
522 Just("rename to é/r.rs\n".to_string()),
523 "[-+ ]?[a-zé\t]{0,12}\n", ]
525 }
526
527 fn diff_doc() -> impl Strategy<Value = String> {
528 prop::collection::vec(diff_line(), 0..40).prop_map(|lines| lines.concat())
529 }
530
531 proptest! {
532 #[test]
534 fn parse_diff_never_panics_on_arbitrary_text(s in any::<String>()) {
535 let _ = parse_diff(&s);
536 }
537
538 #[test]
540 fn parse_diff_never_panics_on_structured_text(s in diff_doc()) {
541 let _ = parse_diff(&s);
542 }
543
544 #[test]
547 fn parse_diff_sections_are_well_formed(s in diff_doc()) {
548 for file in parse_diff(&s) {
549 prop_assert!(file.raw.starts_with("diff --git"));
550 }
551 }
552 }
553}
554
555#[cfg(all(test, feature = "serde"))]
557mod serde_tests {
558 use super::*;
559
560 #[test]
561 fn diff_stat_and_change_kind_serialize() {
562 assert_eq!(
563 serde_json::to_value(DiffStat::new(3, 12, 4)).unwrap(),
564 serde_json::json!({"files_changed": 3, "insertions": 12, "deletions": 4})
565 );
566 assert_eq!(
568 serde_json::to_value(ChangeKind::Renamed).unwrap(),
569 serde_json::json!("Renamed")
570 );
571 }
572}