1#[derive(Debug, Clone)]
21pub enum DiffSpec {
22 WorkingTree,
25 Rev(String),
28}
29
30#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
33#[cfg_attr(feature = "serde", derive(serde::Serialize))]
34#[non_exhaustive]
35pub struct DiffStat {
36 pub files_changed: usize,
38 pub insertions: usize,
40 pub deletions: usize,
42}
43
44impl DiffStat {
45 pub fn new(files_changed: usize, insertions: usize, deletions: usize) -> Self {
49 Self {
50 files_changed,
51 insertions,
52 deletions,
53 }
54 }
55}
56
57#[derive(Debug, Clone, Copy, PartialEq, Eq)]
59#[cfg_attr(feature = "serde", derive(serde::Serialize))]
60#[non_exhaustive]
61pub enum ChangeKind {
62 Added,
64 Modified,
66 Deleted,
68 Renamed,
70}
71
72#[derive(Debug, Clone, PartialEq, Eq)]
77#[cfg_attr(feature = "serde", derive(serde::Serialize))]
78#[non_exhaustive]
79pub enum DiffLine {
80 Context(String),
82 Added(String),
84 Removed(String),
86}
87
88#[derive(Debug, Clone, PartialEq, Eq)]
90#[cfg_attr(feature = "serde", derive(serde::Serialize))]
91#[non_exhaustive]
92pub struct Hunk {
93 pub old_start: usize,
95 pub old_lines: usize,
97 pub new_start: usize,
99 pub new_lines: usize,
101 pub section: String,
103 pub lines: Vec<DiffLine>,
105}
106
107#[derive(Debug, Clone, PartialEq, Eq)]
110#[cfg_attr(feature = "serde", derive(serde::Serialize))]
111#[non_exhaustive]
112pub struct FileDiff {
113 pub change: ChangeKind,
115 pub path: String,
117 pub old_path: Option<String>,
119 pub hunks: Vec<Hunk>,
121 pub raw: String,
124}
125
126pub fn parse_diff(diff: &str) -> Vec<FileDiff> {
134 diff_sections(diff).filter_map(parse_section).collect()
135}
136
137fn diff_sections(full: &str) -> impl Iterator<Item = &str> {
139 let mut bounds = Vec::new();
140 let mut idx = 0;
141 for line in full.split_inclusive('\n') {
142 if line.starts_with("diff --git ") {
143 bounds.push(idx);
144 }
145 idx += line.len();
146 }
147 let ends = bounds
148 .iter()
149 .skip(1)
150 .copied()
151 .chain(std::iter::once(full.len()));
152 bounds
153 .clone()
154 .into_iter()
155 .zip(ends)
156 .map(move |(s, e)| &full[s..e])
157 .collect::<Vec<_>>()
158 .into_iter()
159}
160
161fn parse_section(section: &str) -> Option<FileDiff> {
164 let mut kind = ChangeKind::Modified;
165 let mut new_path = None;
166 let mut minus_path = None;
167 let mut rename_to = None;
168 let mut rename_from = None;
169 let mut hunks: Vec<Hunk> = Vec::new();
170 let mut current: Option<Hunk> = None;
171
172 for line in section.lines() {
173 if let Some(hunk) = parse_hunk_header(line) {
174 if let Some(done) = current.replace(hunk) {
175 hunks.push(done);
176 }
177 continue;
178 }
179 if let Some(hunk) = current.as_mut() {
180 match line.as_bytes().first() {
183 Some(b' ') => hunk.lines.push(DiffLine::Context(line[1..].to_string())),
184 Some(b'+') => hunk.lines.push(DiffLine::Added(line[1..].to_string())),
185 Some(b'-') => hunk.lines.push(DiffLine::Removed(line[1..].to_string())),
186 _ => {}
187 }
188 continue;
189 }
190 if line.starts_with("new file") {
192 kind = ChangeKind::Added;
193 } else if line.starts_with("deleted file") {
194 kind = ChangeKind::Deleted;
195 } else if let Some(p) = line.strip_prefix("rename to ") {
196 rename_to = Some(unquote_git_path(p.trim_end()));
199 } else if let Some(p) = line.strip_prefix("rename from ") {
200 rename_from = Some(unquote_git_path(p.trim_end()));
201 } else if let Some(rest) = line.strip_prefix("+++ ") {
202 new_path = unquote_git_path(rest.trim_end())
206 .strip_prefix("b/")
207 .map(str::to_string);
208 } else if let Some(rest) = line.strip_prefix("--- ") {
209 minus_path = unquote_git_path(rest.trim_end())
210 .strip_prefix("a/")
211 .map(str::to_string);
212 }
213 }
214 if let Some(done) = current.take() {
215 hunks.push(done);
216 }
217
218 let normalize = |p: String| p.replace('\\', "/");
219 let old_path = if rename_to.is_some() {
221 kind = ChangeKind::Renamed;
222 rename_from.map(normalize)
223 } else {
224 None
225 };
226 let path = [rename_to, new_path, minus_path]
231 .into_iter()
232 .flatten()
233 .find(|p| !p.is_empty())
234 .or_else(|| header_b_path(section))?;
235 Some(FileDiff {
236 change: kind,
237 path: normalize(path),
238 old_path,
239 hunks,
240 raw: section.to_string(),
241 })
242}
243
244fn parse_hunk_header(line: &str) -> Option<Hunk> {
247 let rest = line.strip_prefix("@@ ")?;
248 let (ranges, section) = rest.split_once(" @@")?;
249 let mut parts = ranges.split_whitespace();
250 let (old_start, old_lines) = parse_hunk_range(parts.next()?.strip_prefix('-')?);
251 let (new_start, new_lines) = parse_hunk_range(parts.next()?.strip_prefix('+')?);
252 Some(Hunk {
253 old_start,
254 old_lines,
255 new_start,
256 new_lines,
257 section: section.strip_prefix(' ').unwrap_or(section).to_string(),
258 lines: Vec::new(),
259 })
260}
261
262fn parse_hunk_range(range: &str) -> (usize, usize) {
264 match range.split_once(',') {
265 Some((start, count)) => (start.parse().unwrap_or(0), count.parse().unwrap_or(0)),
266 None => (range.parse().unwrap_or(0), 1),
267 }
268}
269
270fn header_b_path(section: &str) -> Option<String> {
276 let first = section.lines().next()?;
277 let s = first.strip_prefix("diff --git ")?;
278 let path = if let Some(q) = s.rfind("\"b/") {
281 unquote_git_path(&s[q..])
282 .strip_prefix("b/")
283 .unwrap_or("")
284 .to_string()
285 } else {
286 let idx = s.find(" b/")?;
287 s[idx + 1..].strip_prefix("b/").unwrap_or("").to_string()
288 };
289 (!path.is_empty()).then_some(path)
292}
293
294fn unquote_git_path(s: &str) -> String {
302 let bytes = s.as_bytes();
303 if bytes.first() != Some(&b'"') {
304 return s.to_string();
305 }
306 let mut out: Vec<u8> = Vec::with_capacity(bytes.len());
307 let mut i = 1; while i < bytes.len() {
309 match bytes[i] {
310 b'"' => break, b'\\' if i + 1 < bytes.len() => {
312 i += 1;
313 match bytes[i] {
314 b'a' => out.push(0x07),
315 b'b' => out.push(0x08),
316 b't' => out.push(b'\t'),
317 b'n' => out.push(b'\n'),
318 b'v' => out.push(0x0b),
319 b'f' => out.push(0x0c),
320 b'r' => out.push(b'\r'),
321 b'"' => out.push(b'"'),
322 b'\\' => out.push(b'\\'),
323 d @ b'0'..=b'7' => {
324 let mut val = u32::from(d - b'0');
326 let mut taken = 0;
327 while taken < 2
328 && i + 1 < bytes.len()
329 && (b'0'..=b'7').contains(&bytes[i + 1])
330 {
331 i += 1;
332 val = val * 8 + u32::from(bytes[i] - b'0');
333 taken += 1;
334 }
335 out.push(val as u8);
336 }
337 other => out.push(other), }
339 i += 1;
340 }
341 b => {
342 out.push(b);
343 i += 1;
344 }
345 }
346 }
347 String::from_utf8_lossy(&out).into_owned()
348}
349
350#[cfg(test)]
351mod tests {
352 use super::*;
353
354 #[test]
355 fn diff_covers_add_modify_delete_rename() {
356 let full = concat!(
359 "diff --git a/new b/new\n",
360 "new file mode 100644\n--- /dev/null\n+++ b/new\n@@ -0,0 +1 @@\n+n\n",
361 "diff --git a/mod b/mod\n",
362 "--- a/mod\n+++ b/mod\n@@ -1 +1 @@\n-a\n+b\n",
363 "diff --git a/gone b/gone\n",
364 "deleted file mode 100644\n--- a/gone\n+++ /dev/null\n@@ -1 +0,0 @@\n-x\n",
365 "diff --git a/old/f.txt b/new/f.txt\n",
366 "similarity index 100%\nrename from old/f.txt\nrename to new/f.txt\n",
367 );
368 let files = parse_diff(full);
369 let kinds: Vec<_> = files.iter().map(|f| (f.path.as_str(), f.change)).collect();
370 assert_eq!(
371 kinds,
372 vec![
373 ("new", ChangeKind::Added),
374 ("mod", ChangeKind::Modified),
375 ("gone", ChangeKind::Deleted),
376 ("new/f.txt", ChangeKind::Renamed),
377 ]
378 );
379 let rename = files
381 .iter()
382 .find(|f| f.change == ChangeKind::Renamed)
383 .unwrap();
384 assert_eq!(rename.old_path.as_deref(), Some("old/f.txt"));
385 }
386
387 #[test]
388 fn diff_handles_space_paths() {
389 let full = "diff --git a/a b/c.txt b/a b/c.txt\n--- a/a b/c.txt\t\n+++ b/a b/c.txt\t\n@@ -1 +1 @@\n-x\n+y\n";
392 let files = parse_diff(full);
393 assert_eq!(files.len(), 1);
394 assert_eq!(files[0].path, "a b/c.txt");
395 }
396
397 #[test]
402 fn diff_unquotes_non_ascii_modify() {
403 let full = concat!(
404 "diff --git \"a/caf\\303\\251.txt\" \"b/caf\\303\\251.txt\"\n",
405 "index 45b983b..b023018 100644\n",
406 "--- \"a/caf\\303\\251.txt\"\n",
407 "+++ \"b/caf\\303\\251.txt\"\n",
408 "@@ -1 +1 @@\n-hi\n+bye\n",
409 );
410 let files = parse_diff(full);
411 assert_eq!(files.len(), 1, "the non-ASCII file must not be dropped");
412 assert_eq!(files[0].path, "café.txt");
413 assert_eq!(files[0].change, ChangeKind::Modified);
414 }
415
416 #[test]
417 fn diff_unquotes_non_ascii_rename() {
418 let full = concat!(
419 "diff --git \"a/caf\\303\\251.txt\" \"b/r\\303\\251sum\\303\\251.txt\"\n",
420 "similarity index 100%\n",
421 "rename from \"caf\\303\\251.txt\"\n",
422 "rename to \"r\\303\\251sum\\303\\251.txt\"\n",
423 );
424 let files = parse_diff(full);
425 assert_eq!(files.len(), 1);
426 assert_eq!(files[0].path, "résumé.txt");
427 assert_eq!(files[0].change, ChangeKind::Renamed);
428 assert_eq!(files[0].old_path.as_deref(), Some("café.txt"));
429 }
430
431 #[test]
434 fn diff_unquotes_quoted_header_fallback() {
435 let full = concat!(
436 "diff --git \"a/caf\\303\\251.bin\" \"b/caf\\303\\251.bin\"\n",
437 "index 0000000..1111111 100644\n",
438 "Binary files \"a/caf\\303\\251.bin\" and \"b/caf\\303\\251.bin\" differ\n",
439 );
440 let files = parse_diff(full);
441 assert_eq!(files.len(), 1);
442 assert_eq!(files[0].path, "café.bin");
443 }
444
445 #[test]
447 fn diff_unquotes_escaped_tab_path() {
448 let full = "diff --git \"a/a\\tb.txt\" \"b/a\\tb.txt\"\n--- \"a/a\\tb.txt\"\n+++ \"b/a\\tb.txt\"\n@@ -1 +1 @@\n-x\n+y\n";
449 let files = parse_diff(full);
450 assert_eq!(files.len(), 1);
451 assert_eq!(files[0].path, "a\tb.txt");
452 }
453
454 #[test]
455 fn unquote_git_path_decodes_escapes_and_passes_through_plain() {
456 assert_eq!(unquote_git_path("b/plain.txt"), "b/plain.txt"); assert_eq!(unquote_git_path("\"b/caf\\303\\251.txt\""), "b/café.txt"); assert_eq!(unquote_git_path("\"a\\tb\""), "a\tb"); assert_eq!(unquote_git_path("\"a\\\\b\""), "a\\b"); assert_eq!(unquote_git_path("\"a\\\"b\""), "a\"b"); }
462
463 #[test]
464 fn diff_drops_sections_with_no_resolvable_path() {
465 let bad = "diff --git a/x b/\nbinary files differ\n";
468 assert!(parse_diff(bad).is_empty());
469 let recover = "diff --git a/real.txt b/real.txt\n+++ b/\nbinary files differ\n";
472 let files = parse_diff(recover);
473 assert_eq!(files.len(), 1);
474 assert_eq!(files[0].path, "real.txt");
475 let mode_only = "diff --git a/f.sh b/f.sh\nold mode 100644\nnew mode 100755\n";
478 let files = parse_diff(mode_only);
479 assert_eq!(files.len(), 1);
480 assert_eq!(files[0].path, "f.sh");
481 }
482
483 #[test]
484 fn diff_parses_hunk_ranges_and_body() {
485 let full = "diff --git a/f b/f\n--- a/f\n+++ b/f\n@@ -1,2 +1,3 @@ fn main()\n ctx\n-old\n+new\n+added\n";
486 let files = parse_diff(full);
487 assert_eq!(files.len(), 1);
488 assert_eq!(files[0].raw, full);
490 let hunk = &files[0].hunks[0];
491 assert_eq!(
492 (
493 hunk.old_start,
494 hunk.old_lines,
495 hunk.new_start,
496 hunk.new_lines
497 ),
498 (1, 2, 1, 3)
499 );
500 assert_eq!(hunk.section, "fn main()");
501 assert_eq!(
502 hunk.lines,
503 vec![
504 DiffLine::Context("ctx".into()),
505 DiffLine::Removed("old".into()),
506 DiffLine::Added("new".into()),
507 DiffLine::Added("added".into()),
508 ]
509 );
510 }
511
512 #[test]
513 fn diff_omitted_count_defaults_to_one() {
514 let full = "diff --git a/f b/f\n--- a/f\n+++ b/f\n@@ -3 +3 @@\n-a\n+b\n";
516 let hunk = &parse_diff(full)[0].hunks[0];
517 assert_eq!((hunk.old_start, hunk.old_lines), (3, 1));
518 assert_eq!((hunk.new_start, hunk.new_lines), (3, 1));
519 }
520}
521
522#[cfg(test)]
527mod proptests {
528 use super::*;
529 use proptest::prelude::*;
530
531 fn diff_line() -> impl Strategy<Value = String> {
534 prop_oneof![
535 Just("diff --git a/f b/f\n".to_string()),
536 Just("--- a/f\n".to_string()),
537 Just("+++ b/f\n".to_string()),
538 Just("@@ -1,2 +3,4 @@ ctx\n".to_string()),
539 Just("@@ -1 +1 @@\n".to_string()),
540 Just("new file mode 100644\n".to_string()),
541 Just("deleted file mode 100644\n".to_string()),
542 Just("rename from {old => new}.rs\n".to_string()),
543 Just("rename to é/r.rs\n".to_string()),
544 "[-+ ]?[a-zé\t]{0,12}\n", ]
546 }
547
548 fn diff_doc() -> impl Strategy<Value = String> {
549 prop::collection::vec(diff_line(), 0..40).prop_map(|lines| lines.concat())
550 }
551
552 proptest! {
553 #[test]
555 fn parse_diff_never_panics_on_arbitrary_text(s in any::<String>()) {
556 let _ = parse_diff(&s);
557 }
558
559 #[test]
561 fn parse_diff_never_panics_on_structured_text(s in diff_doc()) {
562 let _ = parse_diff(&s);
563 }
564
565 #[test]
568 fn parse_diff_sections_are_well_formed(s in diff_doc()) {
569 for file in parse_diff(&s) {
570 prop_assert!(file.raw.starts_with("diff --git"));
571 }
572 }
573 }
574}
575
576#[cfg(all(test, feature = "serde"))]
578mod serde_tests {
579 use super::*;
580
581 #[test]
582 fn diff_stat_and_change_kind_serialize() {
583 assert_eq!(
584 serde_json::to_value(DiffStat::new(3, 12, 4)).unwrap(),
585 serde_json::json!({"files_changed": 3, "insertions": 12, "deletions": 4})
586 );
587 assert_eq!(
589 serde_json::to_value(ChangeKind::Renamed).unwrap(),
590 serde_json::json!("Renamed")
591 );
592 }
593}