1use crate::ApplyPatchArgs;
26use std::path::Path;
27use std::path::PathBuf;
28
29use thiserror::Error;
30
31const BEGIN_PATCH_MARKER: &str = "*** Begin Patch";
32const END_PATCH_MARKER: &str = "*** End Patch";
33const ADD_FILE_MARKER: &str = "*** Add File: ";
34const DELETE_FILE_MARKER: &str = "*** Delete File: ";
35const UPDATE_FILE_MARKER: &str = "*** Update File: ";
36const MOVE_TO_MARKER: &str = "*** Move to: ";
37const EOF_MARKER: &str = "*** End of File";
38const CHANGE_CONTEXT_MARKER: &str = "@@ ";
39const EMPTY_CHANGE_CONTEXT_MARKER: &str = "@@";
40
41const PARSE_IN_STRICT_MODE: bool = false;
48
49#[derive(Debug, PartialEq, Error, Clone)]
50pub enum ParseError {
51 #[error("invalid patch: {0}")]
52 InvalidPatchError(String),
53 #[error("invalid hunk at line {line_number}, {message}")]
54 InvalidHunkError { message: String, line_number: usize },
55}
56use ParseError::*;
57
58#[derive(Debug, PartialEq, Clone)]
59#[allow(clippy::enum_variant_names)]
60pub enum Hunk {
61 AddFile {
62 path: PathBuf,
63 contents: String,
64 },
65 DeleteFile {
66 path: PathBuf,
67 },
68 UpdateFile {
69 path: PathBuf,
70 move_path: Option<PathBuf>,
71
72 chunks: Vec<UpdateFileChunk>,
75 },
76}
77
78impl Hunk {
79 pub fn resolve_path(&self, cwd: &Path) -> PathBuf {
80 match self {
81 Hunk::AddFile { path, .. } => cwd.join(path),
82 Hunk::DeleteFile { path } => cwd.join(path),
83 Hunk::UpdateFile { path, .. } => cwd.join(path),
84 }
85 }
86}
87
88use Hunk::*;
89
90#[derive(Debug, PartialEq, Clone)]
91pub struct UpdateFileChunk {
92 pub change_context: Option<String>,
95
96 pub old_lines: Vec<String>,
99 pub new_lines: Vec<String>,
100
101 pub is_end_of_file: bool,
104}
105
106pub fn parse_patch(patch: &str) -> Result<ApplyPatchArgs, ParseError> {
107 let mode = if PARSE_IN_STRICT_MODE {
108 ParseMode::Strict
109 } else {
110 ParseMode::Lenient
111 };
112 parse_patch_text(patch, mode)
113}
114
115enum ParseMode {
116 Strict,
118
119 Lenient,
152}
153
154fn parse_patch_text(patch: &str, mode: ParseMode) -> Result<ApplyPatchArgs, ParseError> {
155 let lines: Vec<&str> = patch.trim().lines().collect();
156 let lines: &[&str] = match check_patch_boundaries_strict(&lines) {
157 Ok(()) => &lines,
158 Err(e) => match mode {
159 ParseMode::Strict => {
160 return Err(e);
161 }
162 ParseMode::Lenient => check_patch_boundaries_lenient(&lines, e)?,
163 },
164 };
165
166 let mut hunks: Vec<Hunk> = Vec::new();
167 let last_line_index = lines.len().saturating_sub(1);
169 let mut remaining_lines = &lines[1..last_line_index];
170 let mut line_number = 2;
171 while !remaining_lines.is_empty() {
172 let (hunk, hunk_lines) = parse_one_hunk(remaining_lines, line_number)?;
173 hunks.push(hunk);
174 line_number += hunk_lines;
175 remaining_lines = &remaining_lines[hunk_lines..]
176 }
177 let patch = lines.join("\n");
178 Ok(ApplyPatchArgs { hunks, patch })
179}
180
181fn check_patch_boundaries_strict(lines: &[&str]) -> Result<(), ParseError> {
184 let (first_line, last_line) = match lines {
185 [] => (None, None),
186 [first] => (Some(first), Some(first)),
187 [first, .., last] => (Some(first), Some(last)),
188 };
189 check_start_and_end_lines_strict(first_line, last_line)
190}
191
192fn check_patch_boundaries_lenient<'a>(
200 original_lines: &'a [&'a str],
201 original_parse_error: ParseError,
202) -> Result<&'a [&'a str], ParseError> {
203 match original_lines {
204 [first, .., last] => {
205 if (first == &"<<EOF" || first == &"<<'EOF'" || first == &"<<\"EOF\"")
206 && last.ends_with("EOF")
207 && original_lines.len() >= 4
208 {
209 let inner_lines = &original_lines[1..original_lines.len() - 1];
210 match check_patch_boundaries_strict(inner_lines) {
211 Ok(()) => Ok(inner_lines),
212 Err(e) => Err(e),
213 }
214 } else {
215 Err(original_parse_error)
216 }
217 }
218 _ => Err(original_parse_error),
219 }
220}
221
222fn check_start_and_end_lines_strict(
223 first_line: Option<&&str>,
224 last_line: Option<&&str>,
225) -> Result<(), ParseError> {
226 match (first_line, last_line) {
227 (Some(&first), Some(&last)) if first == BEGIN_PATCH_MARKER && last == END_PATCH_MARKER => {
228 Ok(())
229 }
230 (Some(&first), _) if first != BEGIN_PATCH_MARKER => Err(InvalidPatchError(String::from(
231 "The first line of the patch must be '*** Begin Patch'",
232 ))),
233 _ => Err(InvalidPatchError(String::from(
234 "The last line of the patch must be '*** End Patch'",
235 ))),
236 }
237}
238
239fn parse_one_hunk(lines: &[&str], line_number: usize) -> Result<(Hunk, usize), ParseError> {
242 let first_line = lines[0].trim();
244 if let Some(path) = first_line.strip_prefix(ADD_FILE_MARKER) {
245 let mut contents = String::new();
247 let mut parsed_lines = 1;
248 for add_line in &lines[1..] {
249 if let Some(line_to_add) = add_line.strip_prefix('+') {
250 contents.push_str(line_to_add);
251 contents.push('\n');
252 parsed_lines += 1;
253 } else {
254 break;
255 }
256 }
257 return Ok((
258 AddFile {
259 path: PathBuf::from(path),
260 contents,
261 },
262 parsed_lines,
263 ));
264 } else if let Some(path) = first_line.strip_prefix(DELETE_FILE_MARKER) {
265 return Ok((
267 DeleteFile {
268 path: PathBuf::from(path),
269 },
270 1,
271 ));
272 } else if let Some(path) = first_line.strip_prefix(UPDATE_FILE_MARKER) {
273 let mut remaining_lines = &lines[1..];
275 let mut parsed_lines = 1;
276
277 let move_path = remaining_lines
279 .first()
280 .and_then(|x| x.strip_prefix(MOVE_TO_MARKER));
281
282 if move_path.is_some() {
283 remaining_lines = &remaining_lines[1..];
284 parsed_lines += 1;
285 }
286
287 let mut chunks = Vec::new();
288 while !remaining_lines.is_empty() {
290 if remaining_lines[0].trim().is_empty() {
292 parsed_lines += 1;
293 remaining_lines = &remaining_lines[1..];
294 continue;
295 }
296
297 if remaining_lines[0].starts_with("***") {
298 break;
299 }
300
301 let (chunk, chunk_lines) = parse_update_file_chunk(
302 remaining_lines,
303 line_number + parsed_lines,
304 chunks.is_empty(),
305 )?;
306 chunks.push(chunk);
307 parsed_lines += chunk_lines;
308 remaining_lines = &remaining_lines[chunk_lines..]
309 }
310
311 if chunks.is_empty() {
312 return Err(InvalidHunkError {
313 message: format!("Update file hunk for path '{path}' is empty"),
314 line_number,
315 });
316 }
317
318 return Ok((
319 UpdateFile {
320 path: PathBuf::from(path),
321 move_path: move_path.map(PathBuf::from),
322 chunks,
323 },
324 parsed_lines,
325 ));
326 }
327
328 Err(InvalidHunkError {
329 message: format!(
330 "'{first_line}' is not a valid hunk header. Valid hunk headers: '*** Add File: {{path}}', '*** Delete File: {{path}}', '*** Update File: {{path}}'"
331 ),
332 line_number,
333 })
334}
335
336fn parse_update_file_chunk(
337 lines: &[&str],
338 line_number: usize,
339 allow_missing_context: bool,
340) -> Result<(UpdateFileChunk, usize), ParseError> {
341 if lines.is_empty() {
342 return Err(InvalidHunkError {
343 message: "Update hunk does not contain any lines".to_string(),
344 line_number,
345 });
346 }
347 let (change_context, start_index) = if lines[0] == EMPTY_CHANGE_CONTEXT_MARKER {
350 (None, 1)
351 } else if let Some(context) = lines[0].strip_prefix(CHANGE_CONTEXT_MARKER) {
352 (Some(context.to_string()), 1)
353 } else {
354 if !allow_missing_context {
355 return Err(InvalidHunkError {
356 message: format!(
357 "Expected update hunk to start with a @@ context marker, got: '{}'",
358 lines[0]
359 ),
360 line_number,
361 });
362 }
363 (None, 0)
364 };
365 if start_index >= lines.len() {
366 return Err(InvalidHunkError {
367 message: "Update hunk does not contain any lines".to_string(),
368 line_number: line_number + 1,
369 });
370 }
371 let mut chunk = UpdateFileChunk {
372 change_context,
373 old_lines: Vec::new(),
374 new_lines: Vec::new(),
375 is_end_of_file: false,
376 };
377 let mut parsed_lines = 0;
378 for line in &lines[start_index..] {
379 match *line {
380 EOF_MARKER => {
381 if parsed_lines == 0 {
382 return Err(InvalidHunkError {
383 message: "Update hunk does not contain any lines".to_string(),
384 line_number: line_number + 1,
385 });
386 }
387 chunk.is_end_of_file = true;
388 parsed_lines += 1;
389 break;
390 }
391 line_contents => {
392 match line_contents.chars().next() {
393 None => {
394 chunk.old_lines.push(String::new());
396 chunk.new_lines.push(String::new());
397 }
398 Some(' ') => {
399 chunk.old_lines.push(line_contents[1..].to_string());
400 chunk.new_lines.push(line_contents[1..].to_string());
401 }
402 Some('+') => {
403 chunk.new_lines.push(line_contents[1..].to_string());
404 }
405 Some('-') => {
406 chunk.old_lines.push(line_contents[1..].to_string());
407 }
408 _ => {
409 if parsed_lines == 0 {
410 return Err(InvalidHunkError {
411 message: format!(
412 "Unexpected line found in update hunk: '{line_contents}'. Every line should start with ' ' (context line), '+' (added line), or '-' (removed line)"
413 ),
414 line_number: line_number + 1,
415 });
416 }
417 break;
419 }
420 }
421 parsed_lines += 1;
422 }
423 }
424 }
425
426 Ok((chunk, parsed_lines + start_index))
427}
428
429#[test]
430fn test_parse_patch() {
431 assert_eq!(
432 parse_patch_text("bad", ParseMode::Strict),
433 Err(InvalidPatchError(
434 "The first line of the patch must be '*** Begin Patch'".to_string()
435 ))
436 );
437 assert_eq!(
438 parse_patch_text("*** Begin Patch\nbad", ParseMode::Strict),
439 Err(InvalidPatchError(
440 "The last line of the patch must be '*** End Patch'".to_string()
441 ))
442 );
443 assert_eq!(
444 parse_patch_text(
445 "*** Begin Patch\n\
446 *** Update File: test.py\n\
447 *** End Patch",
448 ParseMode::Strict
449 ),
450 Err(InvalidHunkError {
451 message: "Update file hunk for path 'test.py' is empty".to_string(),
452 line_number: 2,
453 })
454 );
455 assert_eq!(
456 parse_patch_text(
457 "*** Begin Patch\n\
458 *** End Patch",
459 ParseMode::Strict
460 )
461 .unwrap()
462 .hunks,
463 Vec::new()
464 );
465 assert_eq!(
466 parse_patch_text(
467 "*** Begin Patch\n\
468 *** Add File: path/add.py\n\
469 +abc\n\
470 +def\n\
471 *** Delete File: path/delete.py\n\
472 *** Update File: path/update.py\n\
473 *** Move to: path/update2.py\n\
474 @@ def f():\n\
475 - pass\n\
476 + return 123\n\
477 *** End Patch",
478 ParseMode::Strict
479 )
480 .unwrap()
481 .hunks,
482 vec![
483 AddFile {
484 path: PathBuf::from("path/add.py"),
485 contents: "abc\ndef\n".to_string()
486 },
487 DeleteFile {
488 path: PathBuf::from("path/delete.py")
489 },
490 UpdateFile {
491 path: PathBuf::from("path/update.py"),
492 move_path: Some(PathBuf::from("path/update2.py")),
493 chunks: vec![UpdateFileChunk {
494 change_context: Some("def f():".to_string()),
495 old_lines: vec![" pass".to_string()],
496 new_lines: vec![" return 123".to_string()],
497 is_end_of_file: false
498 }]
499 }
500 ]
501 );
502 assert_eq!(
504 parse_patch_text(
505 "*** Begin Patch\n\
506 *** Update File: file.py\n\
507 @@\n\
508 +line\n\
509 *** Add File: other.py\n\
510 +content\n\
511 *** End Patch",
512 ParseMode::Strict
513 )
514 .unwrap()
515 .hunks,
516 vec![
517 UpdateFile {
518 path: PathBuf::from("file.py"),
519 move_path: None,
520 chunks: vec![UpdateFileChunk {
521 change_context: None,
522 old_lines: vec![],
523 new_lines: vec!["line".to_string()],
524 is_end_of_file: false
525 }],
526 },
527 AddFile {
528 path: PathBuf::from("other.py"),
529 contents: "content\n".to_string()
530 }
531 ]
532 );
533
534 assert_eq!(
537 parse_patch_text(
538 r#"*** Begin Patch
539*** Update File: file2.py
540 import foo
541+bar
542*** End Patch"#,
543 ParseMode::Strict
544 )
545 .unwrap()
546 .hunks,
547 vec![UpdateFile {
548 path: PathBuf::from("file2.py"),
549 move_path: None,
550 chunks: vec![UpdateFileChunk {
551 change_context: None,
552 old_lines: vec!["import foo".to_string()],
553 new_lines: vec!["import foo".to_string(), "bar".to_string()],
554 is_end_of_file: false,
555 }],
556 }]
557 );
558}
559
560#[test]
561fn test_parse_patch_lenient() {
562 let patch_text = r#"*** Begin Patch
563*** Update File: file2.py
564 import foo
565+bar
566*** End Patch"#;
567 let expected_patch = vec![UpdateFile {
568 path: PathBuf::from("file2.py"),
569 move_path: None,
570 chunks: vec![UpdateFileChunk {
571 change_context: None,
572 old_lines: vec!["import foo".to_string()],
573 new_lines: vec!["import foo".to_string(), "bar".to_string()],
574 is_end_of_file: false,
575 }],
576 }];
577 let expected_error =
578 InvalidPatchError("The first line of the patch must be '*** Begin Patch'".to_string());
579
580 let patch_text_in_heredoc = format!("<<EOF\n{patch_text}\nEOF\n");
581 assert_eq!(
582 parse_patch_text(&patch_text_in_heredoc, ParseMode::Strict),
583 Err(expected_error.clone())
584 );
585 assert_eq!(
586 parse_patch_text(&patch_text_in_heredoc, ParseMode::Lenient),
587 Ok(ApplyPatchArgs {
588 hunks: expected_patch.clone(),
589 patch: patch_text.to_string()
590 })
591 );
592
593 let patch_text_in_single_quoted_heredoc = format!("<<'EOF'\n{patch_text}\nEOF\n");
594 assert_eq!(
595 parse_patch_text(&patch_text_in_single_quoted_heredoc, ParseMode::Strict),
596 Err(expected_error.clone())
597 );
598 assert_eq!(
599 parse_patch_text(&patch_text_in_single_quoted_heredoc, ParseMode::Lenient),
600 Ok(ApplyPatchArgs {
601 hunks: expected_patch.clone(),
602 patch: patch_text.to_string()
603 })
604 );
605
606 let patch_text_in_double_quoted_heredoc = format!("<<\"EOF\"\n{patch_text}\nEOF\n");
607 assert_eq!(
608 parse_patch_text(&patch_text_in_double_quoted_heredoc, ParseMode::Strict),
609 Err(expected_error.clone())
610 );
611 assert_eq!(
612 parse_patch_text(&patch_text_in_double_quoted_heredoc, ParseMode::Lenient),
613 Ok(ApplyPatchArgs {
614 hunks: expected_patch.clone(),
615 patch: patch_text.to_string()
616 })
617 );
618
619 let patch_text_in_mismatched_quotes_heredoc = format!("<<\"EOF'\n{patch_text}\nEOF\n");
620 assert_eq!(
621 parse_patch_text(&patch_text_in_mismatched_quotes_heredoc, ParseMode::Strict),
622 Err(expected_error.clone())
623 );
624 assert_eq!(
625 parse_patch_text(&patch_text_in_mismatched_quotes_heredoc, ParseMode::Lenient),
626 Err(expected_error.clone())
627 );
628
629 let patch_text_with_missing_closing_heredoc =
630 "<<EOF\n*** Begin Patch\n*** Update File: file2.py\nEOF\n".to_string();
631 assert_eq!(
632 parse_patch_text(&patch_text_with_missing_closing_heredoc, ParseMode::Strict),
633 Err(expected_error.clone())
634 );
635 assert_eq!(
636 parse_patch_text(&patch_text_with_missing_closing_heredoc, ParseMode::Lenient),
637 Err(InvalidPatchError(
638 "The last line of the patch must be '*** End Patch'".to_string()
639 ))
640 );
641}
642
643#[test]
644fn test_parse_one_hunk() {
645 assert_eq!(
646 parse_one_hunk(&["bad"], 234),
647 Err(InvalidHunkError {
648 message: "'bad' is not a valid hunk header. \
649 Valid hunk headers: '*** Add File: {path}', '*** Delete File: {path}', '*** Update File: {path}'".to_string(),
650 line_number: 234
651 })
652 );
653 }
655
656#[test]
657fn test_update_file_chunk() {
658 assert_eq!(
659 parse_update_file_chunk(&["bad"], 123, false),
660 Err(InvalidHunkError {
661 message: "Expected update hunk to start with a @@ context marker, got: 'bad'"
662 .to_string(),
663 line_number: 123
664 })
665 );
666 assert_eq!(
667 parse_update_file_chunk(&["@@"], 123, false),
668 Err(InvalidHunkError {
669 message: "Update hunk does not contain any lines".to_string(),
670 line_number: 124
671 })
672 );
673 assert_eq!(
674 parse_update_file_chunk(&["@@", "bad"], 123, false),
675 Err(InvalidHunkError {
676 message: "Unexpected line found in update hunk: 'bad'. \
677 Every line should start with ' ' (context line), '+' (added line), or '-' (removed line)".to_string(),
678 line_number: 124
679 })
680 );
681 assert_eq!(
682 parse_update_file_chunk(&["@@", "*** End of File"], 123, false),
683 Err(InvalidHunkError {
684 message: "Update hunk does not contain any lines".to_string(),
685 line_number: 124
686 })
687 );
688 assert_eq!(
689 parse_update_file_chunk(
690 &[
691 "@@ change_context",
692 "",
693 " context",
694 "-remove",
695 "+add",
696 " context2",
697 "*** End Patch",
698 ],
699 123,
700 false
701 ),
702 Ok((
703 (UpdateFileChunk {
704 change_context: Some("change_context".to_string()),
705 old_lines: vec![
706 "".to_string(),
707 "context".to_string(),
708 "remove".to_string(),
709 "context2".to_string()
710 ],
711 new_lines: vec![
712 "".to_string(),
713 "context".to_string(),
714 "add".to_string(),
715 "context2".to_string()
716 ],
717 is_end_of_file: false
718 }),
719 6
720 ))
721 );
722 assert_eq!(
723 parse_update_file_chunk(&["@@", "+line", "*** End of File"], 123, false),
724 Ok((
725 (UpdateFileChunk {
726 change_context: None,
727 old_lines: vec![],
728 new_lines: vec!["line".to_string()],
729 is_end_of_file: true
730 }),
731 3
732 ))
733 );
734}