Skip to main content

aft/patch/
parser.rs

1//! Parser for the opencode `*** Begin Patch` envelope.
2//!
3//! This ports the pure parsing half of `packages/opencode-plugin/src/patch-parser.ts`.
4
5use regex::Regex;
6use std::sync::OnceLock;
7
8/// Maximum patch text size in bytes to prevent memory exhaustion.
9pub const MAX_PATCH_SIZE: usize = 1024 * 1024;
10/// Maximum number of file operations per patch.
11pub const MAX_HUNKS: usize = 500;
12
13#[derive(Debug, Clone, PartialEq, Eq)]
14pub enum Hunk {
15    Add {
16        path: String,
17        contents: String,
18    },
19    Delete {
20        path: String,
21    },
22    Update {
23        path: String,
24        move_path: Option<String>,
25        chunks: Vec<UpdateFileChunk>,
26    },
27}
28
29#[derive(Debug, Clone, PartialEq, Eq)]
30pub struct UpdateFileChunk {
31    pub old_lines: Vec<String>,
32    pub new_lines: Vec<String>,
33    pub change_context: Option<String>,
34    pub is_end_of_file: bool,
35}
36
37#[derive(Debug, Clone, PartialEq, Eq)]
38pub struct PatchHeader {
39    pub file_path: String,
40    pub move_path: Option<String>,
41    pub next_idx: usize,
42}
43
44/// Strip a whole-input heredoc wrapper; mirrors `patch-parser.ts:33-36`.
45///
46/// The TypeScript regex uses a backreference for the closing delimiter. Rust's
47/// `regex` crate intentionally omits backreferences, so this uses `regex` for
48/// the opening heredoc syntax and then verifies the matching closing delimiter
49/// manually to preserve the same anchored wrapper behavior.
50pub fn strip_heredoc(input: &str) -> String {
51    static OPEN_RE: OnceLock<Regex> = OnceLock::new();
52    let open_re = OPEN_RE.get_or_init(|| {
53        Regex::new(r#"^(?:cat\s+)?<<['"]?([A-Za-z0-9_]+)['"]?\s*\n"#)
54            .expect("heredoc opening regex should compile")
55    });
56
57    let Some(captures) = open_re.captures(input) else {
58        return input.to_owned();
59    };
60    let Some(opening) = captures.get(0) else {
61        return input.to_owned();
62    };
63    let delimiter = captures
64        .get(1)
65        .expect("heredoc regex has a delimiter capture")
66        .as_str();
67    let rest = &input[opening.end()..];
68
69    for (offset, _) in rest.match_indices('\n') {
70        let after_newline = &rest[offset + 1..];
71        let Some(after_delimiter) = after_newline.strip_prefix(delimiter) else {
72            continue;
73        };
74        if after_delimiter.chars().all(char::is_whitespace) {
75            return rest[..offset].to_owned();
76        }
77    }
78
79    input.to_owned()
80}
81
82/// Parse a file-operation header line and return its path, optional move destination, and next line index.
83pub fn parse_patch_header(lines: &[&str], start_idx: usize) -> Option<PatchHeader> {
84    let line = *lines.get(start_idx)?;
85
86    if let Some(path) = line.strip_prefix("*** Add File:") {
87        let file_path = path.trim();
88        return (!file_path.is_empty()).then(|| PatchHeader {
89            file_path: file_path.to_owned(),
90            move_path: None,
91            next_idx: start_idx + 1,
92        });
93    }
94
95    if let Some(path) = line.strip_prefix("*** Delete File:") {
96        let file_path = path.trim();
97        return (!file_path.is_empty()).then(|| PatchHeader {
98            file_path: file_path.to_owned(),
99            move_path: None,
100            next_idx: start_idx + 1,
101        });
102    }
103
104    if let Some(path) = line.strip_prefix("*** Update File:") {
105        let file_path = path.trim();
106        if file_path.is_empty() {
107            return None;
108        }
109
110        let mut move_path = None;
111        let mut next_idx = start_idx + 1;
112        if let Some(next_line) = lines.get(next_idx) {
113            if let Some(path) = next_line.strip_prefix("*** Move to:") {
114                move_path = Some(path.trim().to_owned());
115                next_idx += 1;
116            }
117        }
118
119        return Some(PatchHeader {
120            file_path: file_path.to_owned(),
121            move_path,
122            next_idx,
123        });
124    }
125
126    None
127}
128
129/// Parse added file content by collecting every `+` line until the next patch marker.
130pub fn parse_add_file_content(lines: &[&str], start_idx: usize) -> (String, usize) {
131    let mut content = String::new();
132    let mut i = start_idx;
133
134    while i < lines.len() && !lines[i].starts_with("***") {
135        if let Some(line) = lines[i].strip_prefix('+') {
136            content.push_str(line);
137            content.push('\n');
138        }
139        i += 1;
140    }
141
142    if content.ends_with('\n') {
143        content.pop();
144    }
145
146    (content, i)
147}
148
149/// Parse `@@` update chunks into old/new line vectors and optional context anchors.
150pub fn parse_update_file_chunks(lines: &[&str], start_idx: usize) -> (Vec<UpdateFileChunk>, usize) {
151    let mut chunks = Vec::new();
152    let mut i = start_idx;
153
154    while i < lines.len() && !lines[i].starts_with("***") {
155        if lines[i].starts_with("@@") {
156            let context_line = lines[i]["@@".len()..].trim();
157            i += 1;
158
159            let mut old_lines = Vec::new();
160            let mut new_lines = Vec::new();
161            let mut is_end_of_file = false;
162
163            while i < lines.len() && !lines[i].starts_with("@@") {
164                let change_line = lines[i];
165
166                if change_line == "*** End of File" {
167                    is_end_of_file = true;
168                    i += 1;
169                    break;
170                }
171                if change_line.starts_with("***") {
172                    break;
173                }
174
175                if let Some(content) = change_line.strip_prefix(' ') {
176                    old_lines.push(content.to_owned());
177                    new_lines.push(content.to_owned());
178                } else if let Some(content) = change_line.strip_prefix('-') {
179                    old_lines.push(content.to_owned());
180                } else if let Some(content) = change_line.strip_prefix('+') {
181                    new_lines.push(content.to_owned());
182                }
183
184                i += 1;
185            }
186
187            chunks.push(UpdateFileChunk {
188                old_lines,
189                new_lines,
190                change_context: (!context_line.is_empty()).then(|| context_line.to_owned()),
191                is_end_of_file,
192            });
193        } else {
194            i += 1;
195        }
196    }
197
198    (chunks, i)
199}
200
201/// Parse an opencode apply_patch envelope; mirrors `patch-parser.ts:148-201`.
202///
203/// The size guard uses `patch_text.len()` bytes. TypeScript uses string length,
204/// but the port intentionally guards bytes so Rust bounds actual allocation size.
205pub fn parse_patch(patch_text: &str) -> Result<Vec<Hunk>, String> {
206    if patch_text.len() > MAX_PATCH_SIZE {
207        return Err(format!(
208            "Patch too large: {} bytes exceeds limit of {} bytes",
209            patch_text.len(),
210            MAX_PATCH_SIZE
211        ));
212    }
213
214    let trimmed = patch_text.trim();
215    let cleaned = strip_heredoc(trimmed);
216    let lines: Vec<&str> = cleaned.split('\n').collect();
217    let mut hunks = Vec::new();
218
219    let begin_idx = lines
220        .iter()
221        .position(|line| line.trim() == "*** Begin Patch");
222    let end_idx = lines.iter().position(|line| line.trim() == "*** End Patch");
223
224    let (Some(begin_idx), Some(end_idx)) = (begin_idx, end_idx) else {
225        return Err(
226            "Invalid patch format: missing *** Begin Patch / *** End Patch markers".to_owned(),
227        );
228    };
229    if begin_idx >= end_idx {
230        return Err(
231            "Invalid patch format: missing *** Begin Patch / *** End Patch markers".to_owned(),
232        );
233    }
234
235    let mut i = begin_idx + 1;
236    while i < end_idx {
237        let Some(header) = parse_patch_header(&lines, i) else {
238            i += 1;
239            continue;
240        };
241
242        if hunks.len() >= MAX_HUNKS {
243            return Err(format!(
244                "Patch exceeds maximum of {} file operations",
245                MAX_HUNKS
246            ));
247        }
248
249        if lines[i].starts_with("*** Add File:") {
250            let (contents, next_idx) = parse_add_file_content(&lines, header.next_idx);
251            hunks.push(Hunk::Add {
252                path: header.file_path,
253                contents,
254            });
255            i = next_idx;
256        } else if lines[i].starts_with("*** Delete File:") {
257            hunks.push(Hunk::Delete {
258                path: header.file_path,
259            });
260            i = header.next_idx;
261        } else if lines[i].starts_with("*** Update File:") {
262            let (chunks, next_idx) = parse_update_file_chunks(&lines, header.next_idx);
263            hunks.push(Hunk::Update {
264                path: header.file_path,
265                move_path: header.move_path,
266                chunks,
267            });
268            i = next_idx;
269        } else {
270            i += 1;
271        }
272    }
273
274    Ok(hunks)
275}
276
277#[cfg(test)]
278mod tests {
279    use super::*;
280
281    fn assert_parse_error(patch: &str, expected: &str) {
282        assert_eq!(parse_patch(patch).unwrap_err(), expected);
283    }
284
285    #[test]
286    fn parse_patch_missing_markers_matches_patch_parser_test_4_9() {
287        assert_parse_error(
288            "*** Add File: hello.txt\n+hello",
289            "Invalid patch format: missing *** Begin Patch / *** End Patch markers",
290        );
291    }
292
293    #[test]
294    fn parse_patch_empty_body_matches_patch_parser_test_11_13() {
295        assert_eq!(
296            parse_patch("*** Begin Patch\n*** End Patch").unwrap(),
297            vec![]
298        );
299    }
300
301    #[test]
302    fn parse_patch_ignores_empty_add_header_matches_patch_parser_test_15_17() {
303        assert_eq!(
304            parse_patch("*** Begin Patch\n*** Add File:\n+hello\n*** End Patch").unwrap(),
305            vec![]
306        );
307    }
308
309    #[test]
310    fn parse_patch_size_limit_matches_patch_parser_test_19_25() {
311        let oversized_patch = "x".repeat(MAX_PATCH_SIZE + 1);
312        assert_parse_error(
313            &oversized_patch,
314            "Patch too large: 1048577 bytes exceeds limit of 1048576 bytes",
315        );
316    }
317
318    #[test]
319    fn parse_patch_hunk_limit_matches_patch_parser_test_27_38() {
320        let mut patch = vec!["*** Begin Patch".to_owned()];
321        for index in 0..=MAX_HUNKS {
322            patch.push(format!("*** Add File: file-{index}.txt"));
323            patch.push(format!("+line {index}"));
324        }
325        patch.push("*** End Patch".to_owned());
326
327        assert_parse_error(
328            &patch.join("\n"),
329            "Patch exceeds maximum of 500 file operations",
330        );
331    }
332
333    #[test]
334    fn parse_patch_invalid_heredoc_matches_patch_parser_test_40_56() {
335        let wrapped_patch = [
336            "<<EOF",
337            "*** Begin Patch",
338            "*** Add File: hello.txt",
339            "+hello world",
340            "*** End Patch",
341            "NOT_EOF",
342        ]
343        .join("\n");
344
345        let expected = vec![Hunk::Add {
346            path: "hello.txt".to_owned(),
347            contents: "hello world".to_owned(),
348        }];
349        assert_eq!(parse_patch(&wrapped_patch).unwrap(), expected);
350        assert_eq!(
351            parse_patch(&format!("prefix\n{wrapped_patch}")).unwrap(),
352            expected
353        );
354    }
355
356    #[test]
357    fn strip_heredoc_accepts_whole_input_wrapper_from_patch_parser_source_33_36() {
358        let wrapped_patch = [
359            "cat <<'PATCH'",
360            "*** Begin Patch",
361            "*** Add File: hello.txt",
362            "+hello world",
363            "*** End Patch",
364            "PATCH",
365        ]
366        .join("\n");
367
368        assert_eq!(
369            parse_patch(&wrapped_patch).unwrap(),
370            vec![Hunk::Add {
371                path: "hello.txt".to_owned(),
372                contents: "hello world".to_owned(),
373            }]
374        );
375    }
376
377    #[test]
378    fn parse_patch_round_trips_add_delete_update_move_from_parser_source_38_141() {
379        let patch = [
380            "*** Begin Patch",
381            "*** Add File: src/new.txt",
382            "+hello",
383            "+world",
384            "*** Delete File: src/old.txt",
385            "*** Update File: src/edit.txt",
386            "@@ function demo()",
387            " const keep = true;",
388            "-const value = 1;",
389            "+const value = 2;",
390            "*** Update File: src/from.txt",
391            "*** Move to: src/to.txt",
392            "@@",
393            "-old",
394            "+new",
395            "*** End of File",
396            "*** End Patch",
397        ]
398        .join("\n");
399
400        assert_eq!(
401            parse_patch(&patch).unwrap(),
402            vec![
403                Hunk::Add {
404                    path: "src/new.txt".to_owned(),
405                    contents: "hello\nworld".to_owned(),
406                },
407                Hunk::Delete {
408                    path: "src/old.txt".to_owned(),
409                },
410                Hunk::Update {
411                    path: "src/edit.txt".to_owned(),
412                    move_path: None,
413                    chunks: vec![UpdateFileChunk {
414                        old_lines: vec![
415                            "const keep = true;".to_owned(),
416                            "const value = 1;".to_owned()
417                        ],
418                        new_lines: vec![
419                            "const keep = true;".to_owned(),
420                            "const value = 2;".to_owned()
421                        ],
422                        change_context: Some("function demo()".to_owned()),
423                        is_end_of_file: false,
424                    }],
425                },
426                Hunk::Update {
427                    path: "src/from.txt".to_owned(),
428                    move_path: Some("src/to.txt".to_owned()),
429                    chunks: vec![UpdateFileChunk {
430                        old_lines: vec!["old".to_owned()],
431                        new_lines: vec!["new".to_owned()],
432                        change_context: None,
433                        is_end_of_file: true,
434                    }],
435                },
436            ]
437        );
438    }
439
440    #[test]
441    fn parse_patch_supports_multiple_chunks_in_one_update_from_parser_source_91_141() {
442        let patch = [
443            "*** Begin Patch",
444            "*** Update File: src/multi.txt",
445            "@@ first",
446            "-one",
447            "+two",
448            "@@ second",
449            " three",
450            "-four",
451            "+five",
452            "*** End Patch",
453        ]
454        .join("\n");
455
456        assert_eq!(
457            parse_patch(&patch).unwrap(),
458            vec![Hunk::Update {
459                path: "src/multi.txt".to_owned(),
460                move_path: None,
461                chunks: vec![
462                    UpdateFileChunk {
463                        old_lines: vec!["one".to_owned()],
464                        new_lines: vec!["two".to_owned()],
465                        change_context: Some("first".to_owned()),
466                        is_end_of_file: false,
467                    },
468                    UpdateFileChunk {
469                        old_lines: vec!["three".to_owned(), "four".to_owned()],
470                        new_lines: vec!["three".to_owned(), "five".to_owned()],
471                        change_context: Some("second".to_owned()),
472                        is_end_of_file: false,
473                    },
474                ],
475            }]
476        );
477    }
478}