1use regex::Regex;
6use std::sync::OnceLock;
7
8pub const MAX_PATCH_SIZE: usize = 1024 * 1024;
10pub const MAX_HUNKS: usize = 500;
12
13#[derive(Debug, Clone, PartialEq, Eq)]
14pub enum Hunk {
15 Add {
16 path: String,
17 contents: String,
18 },
19 Delete {
20 path: String,
21 },
22 Update {
23 path: String,
24 move_path: Option<String>,
25 chunks: Vec<UpdateFileChunk>,
26 },
27}
28
29#[derive(Debug, Clone, PartialEq, Eq)]
30pub struct UpdateFileChunk {
31 pub old_lines: Vec<String>,
32 pub new_lines: Vec<String>,
33 pub change_context: Option<String>,
34 pub is_end_of_file: bool,
35}
36
37#[derive(Debug, Clone, PartialEq, Eq)]
38pub struct PatchHeader {
39 pub file_path: String,
40 pub move_path: Option<String>,
41 pub next_idx: usize,
42}
43
44pub fn strip_heredoc(input: &str) -> String {
51 static OPEN_RE: OnceLock<Regex> = OnceLock::new();
52 let open_re = OPEN_RE.get_or_init(|| {
53 Regex::new(r#"^(?:cat\s+)?<<['"]?([A-Za-z0-9_]+)['"]?\s*\n"#)
54 .expect("heredoc opening regex should compile")
55 });
56
57 let Some(captures) = open_re.captures(input) else {
58 return input.to_owned();
59 };
60 let Some(opening) = captures.get(0) else {
61 return input.to_owned();
62 };
63 let delimiter = captures
64 .get(1)
65 .expect("heredoc regex has a delimiter capture")
66 .as_str();
67 let rest = &input[opening.end()..];
68
69 for (offset, _) in rest.match_indices('\n') {
70 let after_newline = &rest[offset + 1..];
71 let Some(after_delimiter) = after_newline.strip_prefix(delimiter) else {
72 continue;
73 };
74 if after_delimiter.chars().all(char::is_whitespace) {
75 return rest[..offset].to_owned();
76 }
77 }
78
79 input.to_owned()
80}
81
82pub fn parse_patch_header(lines: &[&str], start_idx: usize) -> Option<PatchHeader> {
84 let line = *lines.get(start_idx)?;
85
86 if let Some(path) = line.strip_prefix("*** Add File:") {
87 let file_path = path.trim();
88 return (!file_path.is_empty()).then(|| PatchHeader {
89 file_path: file_path.to_owned(),
90 move_path: None,
91 next_idx: start_idx + 1,
92 });
93 }
94
95 if let Some(path) = line.strip_prefix("*** Delete File:") {
96 let file_path = path.trim();
97 return (!file_path.is_empty()).then(|| PatchHeader {
98 file_path: file_path.to_owned(),
99 move_path: None,
100 next_idx: start_idx + 1,
101 });
102 }
103
104 if let Some(path) = line.strip_prefix("*** Update File:") {
105 let file_path = path.trim();
106 if file_path.is_empty() {
107 return None;
108 }
109
110 let mut move_path = None;
111 let mut next_idx = start_idx + 1;
112 if let Some(next_line) = lines.get(next_idx) {
113 if let Some(path) = next_line.strip_prefix("*** Move to:") {
114 move_path = Some(path.trim().to_owned());
115 next_idx += 1;
116 }
117 }
118
119 return Some(PatchHeader {
120 file_path: file_path.to_owned(),
121 move_path,
122 next_idx,
123 });
124 }
125
126 None
127}
128
129pub fn parse_add_file_content(lines: &[&str], start_idx: usize) -> (String, usize) {
131 let mut content = String::new();
132 let mut i = start_idx;
133
134 while i < lines.len() && !lines[i].starts_with("***") {
135 if let Some(line) = lines[i].strip_prefix('+') {
136 content.push_str(line);
137 content.push('\n');
138 }
139 i += 1;
140 }
141
142 if content.ends_with('\n') {
143 content.pop();
144 }
145
146 (content, i)
147}
148
149pub fn parse_update_file_chunks(lines: &[&str], start_idx: usize) -> (Vec<UpdateFileChunk>, usize) {
151 let mut chunks = Vec::new();
152 let mut i = start_idx;
153
154 while i < lines.len() && !lines[i].starts_with("***") {
155 if lines[i].starts_with("@@") {
156 let context_line = lines[i]["@@".len()..].trim();
157 i += 1;
158
159 let mut old_lines = Vec::new();
160 let mut new_lines = Vec::new();
161 let mut is_end_of_file = false;
162
163 while i < lines.len() && !lines[i].starts_with("@@") {
164 let change_line = lines[i];
165
166 if change_line == "*** End of File" {
167 is_end_of_file = true;
168 i += 1;
169 break;
170 }
171 if change_line.starts_with("***") {
172 break;
173 }
174
175 if let Some(content) = change_line.strip_prefix(' ') {
176 old_lines.push(content.to_owned());
177 new_lines.push(content.to_owned());
178 } else if let Some(content) = change_line.strip_prefix('-') {
179 old_lines.push(content.to_owned());
180 } else if let Some(content) = change_line.strip_prefix('+') {
181 new_lines.push(content.to_owned());
182 }
183
184 i += 1;
185 }
186
187 chunks.push(UpdateFileChunk {
188 old_lines,
189 new_lines,
190 change_context: (!context_line.is_empty()).then(|| context_line.to_owned()),
191 is_end_of_file,
192 });
193 } else {
194 i += 1;
195 }
196 }
197
198 (chunks, i)
199}
200
201pub fn parse_patch(patch_text: &str) -> Result<Vec<Hunk>, String> {
206 if patch_text.len() > MAX_PATCH_SIZE {
207 return Err(format!(
208 "Patch too large: {} bytes exceeds limit of {} bytes",
209 patch_text.len(),
210 MAX_PATCH_SIZE
211 ));
212 }
213
214 let trimmed = patch_text.trim();
215 let cleaned = strip_heredoc(trimmed);
216 let lines: Vec<&str> = cleaned.split('\n').collect();
217 let mut hunks = Vec::new();
218
219 let begin_idx = lines
220 .iter()
221 .position(|line| line.trim() == "*** Begin Patch");
222 let end_idx = lines.iter().position(|line| line.trim() == "*** End Patch");
223
224 let (Some(begin_idx), Some(end_idx)) = (begin_idx, end_idx) else {
225 return Err(
226 "Invalid patch format: missing *** Begin Patch / *** End Patch markers".to_owned(),
227 );
228 };
229 if begin_idx >= end_idx {
230 return Err(
231 "Invalid patch format: missing *** Begin Patch / *** End Patch markers".to_owned(),
232 );
233 }
234
235 let mut i = begin_idx + 1;
236 while i < end_idx {
237 let Some(header) = parse_patch_header(&lines, i) else {
238 i += 1;
239 continue;
240 };
241
242 if hunks.len() >= MAX_HUNKS {
243 return Err(format!(
244 "Patch exceeds maximum of {} file operations",
245 MAX_HUNKS
246 ));
247 }
248
249 if lines[i].starts_with("*** Add File:") {
250 let (contents, next_idx) = parse_add_file_content(&lines, header.next_idx);
251 hunks.push(Hunk::Add {
252 path: header.file_path,
253 contents,
254 });
255 i = next_idx;
256 } else if lines[i].starts_with("*** Delete File:") {
257 hunks.push(Hunk::Delete {
258 path: header.file_path,
259 });
260 i = header.next_idx;
261 } else if lines[i].starts_with("*** Update File:") {
262 let (chunks, next_idx) = parse_update_file_chunks(&lines, header.next_idx);
263 hunks.push(Hunk::Update {
264 path: header.file_path,
265 move_path: header.move_path,
266 chunks,
267 });
268 i = next_idx;
269 } else {
270 i += 1;
271 }
272 }
273
274 Ok(hunks)
275}
276
277#[cfg(test)]
278mod tests {
279 use super::*;
280
281 fn assert_parse_error(patch: &str, expected: &str) {
282 assert_eq!(parse_patch(patch).unwrap_err(), expected);
283 }
284
285 #[test]
286 fn parse_patch_missing_markers_matches_patch_parser_test_4_9() {
287 assert_parse_error(
288 "*** Add File: hello.txt\n+hello",
289 "Invalid patch format: missing *** Begin Patch / *** End Patch markers",
290 );
291 }
292
293 #[test]
294 fn parse_patch_empty_body_matches_patch_parser_test_11_13() {
295 assert_eq!(
296 parse_patch("*** Begin Patch\n*** End Patch").unwrap(),
297 vec![]
298 );
299 }
300
301 #[test]
302 fn parse_patch_ignores_empty_add_header_matches_patch_parser_test_15_17() {
303 assert_eq!(
304 parse_patch("*** Begin Patch\n*** Add File:\n+hello\n*** End Patch").unwrap(),
305 vec![]
306 );
307 }
308
309 #[test]
310 fn parse_patch_size_limit_matches_patch_parser_test_19_25() {
311 let oversized_patch = "x".repeat(MAX_PATCH_SIZE + 1);
312 assert_parse_error(
313 &oversized_patch,
314 "Patch too large: 1048577 bytes exceeds limit of 1048576 bytes",
315 );
316 }
317
318 #[test]
319 fn parse_patch_hunk_limit_matches_patch_parser_test_27_38() {
320 let mut patch = vec!["*** Begin Patch".to_owned()];
321 for index in 0..=MAX_HUNKS {
322 patch.push(format!("*** Add File: file-{index}.txt"));
323 patch.push(format!("+line {index}"));
324 }
325 patch.push("*** End Patch".to_owned());
326
327 assert_parse_error(
328 &patch.join("\n"),
329 "Patch exceeds maximum of 500 file operations",
330 );
331 }
332
333 #[test]
334 fn parse_patch_invalid_heredoc_matches_patch_parser_test_40_56() {
335 let wrapped_patch = [
336 "<<EOF",
337 "*** Begin Patch",
338 "*** Add File: hello.txt",
339 "+hello world",
340 "*** End Patch",
341 "NOT_EOF",
342 ]
343 .join("\n");
344
345 let expected = vec![Hunk::Add {
346 path: "hello.txt".to_owned(),
347 contents: "hello world".to_owned(),
348 }];
349 assert_eq!(parse_patch(&wrapped_patch).unwrap(), expected);
350 assert_eq!(
351 parse_patch(&format!("prefix\n{wrapped_patch}")).unwrap(),
352 expected
353 );
354 }
355
356 #[test]
357 fn strip_heredoc_accepts_whole_input_wrapper_from_patch_parser_source_33_36() {
358 let wrapped_patch = [
359 "cat <<'PATCH'",
360 "*** Begin Patch",
361 "*** Add File: hello.txt",
362 "+hello world",
363 "*** End Patch",
364 "PATCH",
365 ]
366 .join("\n");
367
368 assert_eq!(
369 parse_patch(&wrapped_patch).unwrap(),
370 vec![Hunk::Add {
371 path: "hello.txt".to_owned(),
372 contents: "hello world".to_owned(),
373 }]
374 );
375 }
376
377 #[test]
378 fn parse_patch_round_trips_add_delete_update_move_from_parser_source_38_141() {
379 let patch = [
380 "*** Begin Patch",
381 "*** Add File: src/new.txt",
382 "+hello",
383 "+world",
384 "*** Delete File: src/old.txt",
385 "*** Update File: src/edit.txt",
386 "@@ function demo()",
387 " const keep = true;",
388 "-const value = 1;",
389 "+const value = 2;",
390 "*** Update File: src/from.txt",
391 "*** Move to: src/to.txt",
392 "@@",
393 "-old",
394 "+new",
395 "*** End of File",
396 "*** End Patch",
397 ]
398 .join("\n");
399
400 assert_eq!(
401 parse_patch(&patch).unwrap(),
402 vec![
403 Hunk::Add {
404 path: "src/new.txt".to_owned(),
405 contents: "hello\nworld".to_owned(),
406 },
407 Hunk::Delete {
408 path: "src/old.txt".to_owned(),
409 },
410 Hunk::Update {
411 path: "src/edit.txt".to_owned(),
412 move_path: None,
413 chunks: vec![UpdateFileChunk {
414 old_lines: vec![
415 "const keep = true;".to_owned(),
416 "const value = 1;".to_owned()
417 ],
418 new_lines: vec![
419 "const keep = true;".to_owned(),
420 "const value = 2;".to_owned()
421 ],
422 change_context: Some("function demo()".to_owned()),
423 is_end_of_file: false,
424 }],
425 },
426 Hunk::Update {
427 path: "src/from.txt".to_owned(),
428 move_path: Some("src/to.txt".to_owned()),
429 chunks: vec![UpdateFileChunk {
430 old_lines: vec!["old".to_owned()],
431 new_lines: vec!["new".to_owned()],
432 change_context: None,
433 is_end_of_file: true,
434 }],
435 },
436 ]
437 );
438 }
439
440 #[test]
441 fn parse_patch_supports_multiple_chunks_in_one_update_from_parser_source_91_141() {
442 let patch = [
443 "*** Begin Patch",
444 "*** Update File: src/multi.txt",
445 "@@ first",
446 "-one",
447 "+two",
448 "@@ second",
449 " three",
450 "-four",
451 "+five",
452 "*** End Patch",
453 ]
454 .join("\n");
455
456 assert_eq!(
457 parse_patch(&patch).unwrap(),
458 vec![Hunk::Update {
459 path: "src/multi.txt".to_owned(),
460 move_path: None,
461 chunks: vec![
462 UpdateFileChunk {
463 old_lines: vec!["one".to_owned()],
464 new_lines: vec!["two".to_owned()],
465 change_context: Some("first".to_owned()),
466 is_end_of_file: false,
467 },
468 UpdateFileChunk {
469 old_lines: vec!["three".to_owned(), "four".to_owned()],
470 new_lines: vec!["three".to_owned(), "five".to_owned()],
471 change_context: Some("second".to_owned()),
472 is_end_of_file: false,
473 },
474 ],
475 }]
476 );
477 }
478}