1const FILE_DIFF_MARKER: &str = "diff --git a/";
5
6const HUNK_MARKER: &str = "@@ ";
8
9#[derive(Debug, Clone)]
11pub struct FileDiff {
12 pub path: String,
14 pub content: String,
16 pub byte_len: usize,
18}
19
20#[derive(Debug, Clone)]
22pub struct HunkDiff {
23 pub file_header: String,
25 pub content: String,
27 pub byte_len: usize,
29}
30
31pub fn split_by_file(diff: &str) -> Vec<FileDiff> {
36 let mut result = Vec::new();
37 let mut positions = Vec::new();
38
39 if diff.starts_with(FILE_DIFF_MARKER) {
41 positions.push(0);
42 }
43 let search = format!("\n{FILE_DIFF_MARKER}");
44 let mut start = 0;
45 while let Some(pos) = diff[start..].find(&search) {
46 positions.push(start + pos + 1);
48 start = start + pos + 1;
49 }
50
51 for (i, &pos) in positions.iter().enumerate() {
52 let end = positions.get(i + 1).copied().unwrap_or(diff.len());
53 let content = &diff[pos..end];
54 let first_line = content.lines().next().unwrap_or("");
55 let path = extract_path_from_diff_header(first_line);
56
57 result.push(FileDiff {
58 path,
59 content: content.to_string(),
60 byte_len: content.len(),
61 });
62 }
63
64 result
65}
66
67pub fn split_file_by_hunk(file_diff: &FileDiff) -> Vec<HunkDiff> {
73 let content = &file_diff.content;
74 let mut hunk_positions = Vec::new();
75
76 if content.starts_with(HUNK_MARKER) {
78 hunk_positions.push(0);
79 }
80 let search = format!("\n{HUNK_MARKER}");
81 let mut start = 0;
82 while let Some(pos) = content[start..].find(&search) {
83 hunk_positions.push(start + pos + 1);
84 start = start + pos + 1;
85 }
86
87 if hunk_positions.is_empty() {
88 return Vec::new();
89 }
90
91 let file_header = &content[..hunk_positions[0]];
93
94 let mut result = Vec::new();
95 for (i, &pos) in hunk_positions.iter().enumerate() {
96 let end = hunk_positions.get(i + 1).copied().unwrap_or(content.len());
97 let hunk_content = &content[pos..end];
98 let byte_len = file_header.len() + hunk_content.len();
99
100 result.push(HunkDiff {
101 file_header: file_header.to_string(),
102 content: hunk_content.to_string(),
103 byte_len,
104 });
105 }
106
107 result
108}
109
110fn extract_path_from_diff_header(header_line: &str) -> String {
112 if let Some(b_pos) = header_line.rfind(" b/") {
115 header_line[b_pos + 3..].to_string()
116 } else {
117 header_line
119 .strip_prefix(FILE_DIFF_MARKER)
120 .unwrap_or(header_line)
121 .to_string()
122 }
123}
124
125#[cfg(test)]
126#[allow(clippy::unwrap_used, clippy::expect_used)]
127mod tests {
128 use super::*;
129
130 fn make_file_header(path: &str) -> String {
134 format!(
135 "diff --git a/{path} b/{path}\n\
136 index abc1234..def5678 100644\n\
137 --- a/{path}\n\
138 +++ b/{path}\n"
139 )
140 }
141
142 fn make_hunk(
144 old_start: usize,
145 old_count: usize,
146 new_start: usize,
147 new_count: usize,
148 body: &str,
149 ) -> String {
150 format!("@@ -{old_start},{old_count} +{new_start},{new_count} @@\n{body}")
151 }
152
153 fn make_single_file_diff(path: &str, hunk_body: &str) -> String {
155 format!(
156 "{}{}",
157 make_file_header(path),
158 make_hunk(1, 3, 1, 4, hunk_body)
159 )
160 }
161
162 #[test]
165 fn split_by_file_empty_input() {
166 let result = split_by_file("");
167 assert!(result.is_empty());
168 }
169
170 #[test]
171 fn split_by_file_whitespace_only() {
172 let result = split_by_file(" \n\n \t ");
173 assert!(result.is_empty());
174 }
175
176 #[test]
177 fn split_by_file_no_diff_markers() {
178 let result = split_by_file("some random text\nwithout diff markers\n");
179 assert!(result.is_empty());
180 }
181
182 #[test]
183 fn split_by_file_single_file_single_hunk() {
184 let diff = make_single_file_diff(
185 "src/main.rs",
186 " fn main() {\n+ println!(\"hello\");\n }\n",
187 );
188 let result = split_by_file(&diff);
189 assert_eq!(result.len(), 1);
190 assert_eq!(result[0].path, "src/main.rs");
191 assert_eq!(result[0].content, diff);
192 }
193
194 #[test]
195 fn split_by_file_single_file_multiple_hunks() {
196 let header = make_file_header("lib.rs");
197 let hunk1 = make_hunk(1, 3, 1, 4, "+use std::io;\n");
198 let hunk2 = make_hunk(10, 2, 11, 3, "+// new comment\n");
199 let diff = format!("{header}{hunk1}{hunk2}");
200
201 let result = split_by_file(&diff);
202 assert_eq!(result.len(), 1);
203 assert_eq!(result[0].path, "lib.rs");
204 assert!(result[0].content.contains("@@ -1,3 +1,4 @@"));
205 assert!(result[0].content.contains("@@ -10,2 +11,3 @@"));
206 }
207
208 #[test]
209 fn split_by_file_multiple_files() {
210 let file1 = make_single_file_diff("a.rs", "+line\n");
211 let file2 = make_single_file_diff("b.rs", "+other\n");
212 let file3 = make_single_file_diff("c.rs", "+third\n");
213 let diff = format!("{file1}{file2}{file3}");
214
215 let result = split_by_file(&diff);
216 assert_eq!(result.len(), 3);
217 assert_eq!(result[0].path, "a.rs");
218 assert_eq!(result[1].path, "b.rs");
219 assert_eq!(result[2].path, "c.rs");
220 }
221
222 #[test]
223 fn split_by_file_binary_marker() {
224 let diff = "diff --git a/image.png b/image.png\n\
225 new file mode 100644\n\
226 index 0000000..abc1234\n\
227 Binary files /dev/null and b/image.png differ\n";
228
229 let result = split_by_file(diff);
230 assert_eq!(result.len(), 1);
231 assert_eq!(result[0].path, "image.png");
232 assert!(result[0].content.contains("Binary files"));
233 }
234
235 #[test]
236 fn split_by_file_rename() {
237 let diff = "diff --git a/old_name.rs b/new_name.rs\n\
238 similarity index 95%\n\
239 rename from old_name.rs\n\
240 rename to new_name.rs\n\
241 index abc1234..def5678 100644\n\
242 --- a/old_name.rs\n\
243 +++ b/new_name.rs\n\
244 @@ -1,3 +1,3 @@\n\
245 -// old\n\
246 +// new\n";
247
248 let result = split_by_file(diff);
249 assert_eq!(result.len(), 1);
250 assert_eq!(result[0].path, "new_name.rs");
251 }
252
253 #[test]
254 fn split_by_file_byte_len_matches_content() {
255 let file1 = make_single_file_diff("a.rs", "+line\n");
256 let file2 = make_single_file_diff("b.rs", "+other\n");
257 let diff = format!("{file1}{file2}");
258
259 let result = split_by_file(&diff);
260 for file_diff in &result {
261 assert_eq!(file_diff.byte_len, file_diff.content.len());
262 }
263 }
264
265 #[test]
266 fn split_by_file_content_preserved_verbatim() {
267 let file1 = make_single_file_diff("a.rs", "+line\n");
268 let file2 = make_single_file_diff("b.rs", "+other\n");
269 let diff = format!("{file1}{file2}");
270
271 let result = split_by_file(&diff);
272 let rejoined: String = result.iter().map(|f| f.content.as_str()).collect();
273 assert_eq!(rejoined, diff);
274 }
275
276 #[test]
279 fn split_file_by_hunk_no_hunks() {
280 let file_diff = FileDiff {
281 path: "image.png".to_string(),
282 content: "diff --git a/image.png b/image.png\n\
283 new file mode 100644\n\
284 index 0000000..abc1234\n\
285 Binary files /dev/null and b/image.png differ\n"
286 .to_string(),
287 byte_len: 0, };
289 let result = split_file_by_hunk(&file_diff);
290 assert!(result.is_empty());
291 }
292
293 #[test]
294 fn split_file_by_hunk_single_hunk() {
295 let content =
296 make_single_file_diff("main.rs", " fn main() {\n+ println!(\"hi\");\n }\n");
297 let file_diff = FileDiff {
298 path: "main.rs".to_string(),
299 byte_len: content.len(),
300 content,
301 };
302
303 let result = split_file_by_hunk(&file_diff);
304 assert_eq!(result.len(), 1);
305 assert!(result[0].content.starts_with(HUNK_MARKER));
306 assert!(result[0].file_header.starts_with("diff --git"));
307 }
308
309 #[test]
310 fn split_file_by_hunk_multiple_hunks() {
311 let header = make_file_header("lib.rs");
312 let hunk1 = make_hunk(1, 3, 1, 4, "+use std::io;\n");
313 let hunk2 = make_hunk(10, 2, 11, 3, "+// comment\n");
314 let hunk3 = make_hunk(20, 1, 22, 2, "+fn new() {}\n");
315 let content = format!("{header}{hunk1}{hunk2}{hunk3}");
316 let file_diff = FileDiff {
317 path: "lib.rs".to_string(),
318 byte_len: content.len(),
319 content,
320 };
321
322 let result = split_file_by_hunk(&file_diff);
323 assert_eq!(result.len(), 3);
324 }
325
326 #[test]
327 fn split_file_by_hunk_header_included() {
328 let header = make_file_header("lib.rs");
329 let hunk1 = make_hunk(1, 3, 1, 4, "+line\n");
330 let hunk2 = make_hunk(10, 2, 11, 3, "+other\n");
331 let content = format!("{header}{hunk1}{hunk2}");
332 let file_diff = FileDiff {
333 path: "lib.rs".to_string(),
334 byte_len: content.len(),
335 content,
336 };
337
338 let result = split_file_by_hunk(&file_diff);
339 for hunk in &result {
340 assert!(
341 hunk.file_header.contains("diff --git"),
342 "file_header should contain the diff --git line"
343 );
344 assert!(
345 hunk.file_header.contains("--- a/"),
346 "file_header should contain the --- line"
347 );
348 assert!(
349 hunk.file_header.contains("+++ b/"),
350 "file_header should contain the +++ line"
351 );
352 }
353 }
354
355 #[test]
356 fn split_file_by_hunk_content_starts_with_at() {
357 let header = make_file_header("lib.rs");
358 let hunk1 = make_hunk(1, 3, 1, 4, "+line\n");
359 let hunk2 = make_hunk(10, 2, 11, 3, "+other\n");
360 let content = format!("{header}{hunk1}{hunk2}");
361 let file_diff = FileDiff {
362 path: "lib.rs".to_string(),
363 byte_len: content.len(),
364 content,
365 };
366
367 let result = split_file_by_hunk(&file_diff);
368 for hunk in &result {
369 assert!(
370 hunk.content.starts_with(HUNK_MARKER),
371 "hunk content should start with '{}', got: {:?}",
372 HUNK_MARKER,
373 &hunk.content[..hunk.content.len().min(20)]
374 );
375 }
376 }
377
378 #[test]
379 fn split_file_by_hunk_byte_len_is_header_plus_content() {
380 let header = make_file_header("lib.rs");
381 let hunk1 = make_hunk(1, 3, 1, 4, "+line\n");
382 let hunk2 = make_hunk(10, 2, 11, 3, "+other\n");
383 let content = format!("{header}{hunk1}{hunk2}");
384 let file_diff = FileDiff {
385 path: "lib.rs".to_string(),
386 byte_len: content.len(),
387 content,
388 };
389
390 let result = split_file_by_hunk(&file_diff);
391 for hunk in &result {
392 assert_eq!(
393 hunk.byte_len,
394 hunk.file_header.len() + hunk.content.len(),
395 "byte_len should equal file_header.len() + content.len()"
396 );
397 }
398 }
399
400 #[test]
401 fn split_file_by_hunk_mode_change_only() {
402 let content = "diff --git a/script.sh b/script.sh\n\
403 old mode 100644\n\
404 new mode 100755\n"
405 .to_string();
406 let file_diff = FileDiff {
407 path: "script.sh".to_string(),
408 byte_len: content.len(),
409 content,
410 };
411
412 let result = split_file_by_hunk(&file_diff);
413 assert!(result.is_empty());
414 }
415
416 #[test]
419 fn path_extraction_simple() {
420 assert_eq!(
421 extract_path_from_diff_header("diff --git a/foo.rs b/foo.rs"),
422 "foo.rs"
423 );
424 }
425
426 #[test]
427 fn path_extraction_nested() {
428 assert_eq!(
429 extract_path_from_diff_header("diff --git a/src/git/diff.rs b/src/git/diff.rs"),
430 "src/git/diff.rs"
431 );
432 }
433
434 #[test]
435 fn path_extraction_rename() {
436 assert_eq!(
437 extract_path_from_diff_header("diff --git a/old.rs b/new.rs"),
438 "new.rs"
439 );
440 }
441
442 #[test]
443 fn path_extraction_with_spaces() {
444 assert_eq!(
445 extract_path_from_diff_header("diff --git a/my file.rs b/my file.rs"),
446 "my file.rs"
447 );
448 }
449
450 #[test]
453 fn roundtrip_split_and_rejoin() {
454 let file1 = make_single_file_diff("a.rs", "+line1\n");
455 let file2 = make_single_file_diff("b.rs", "+line2\n");
456 let file3 = make_single_file_diff("c.rs", "+line3\n");
457 let original = format!("{file1}{file2}{file3}");
458
459 let files = split_by_file(&original);
460 let rejoined: String = files.iter().map(|f| f.content.as_str()).collect();
461 assert_eq!(rejoined, original);
462 }
463}