Skip to main content

xchecker_engine/fixup/
parse.rs

1use std::path::PathBuf;
2
3use regex::Regex;
4
5use crate::error::FixupError;
6use crate::paths::{SandboxConfig, SandboxError, SandboxPath, SandboxRoot};
7
8use super::model::{DiffHunk, FixupMode, UnifiedDiff};
9
10/// Main fixup parser that handles detection and parsing of fixup plans
11///
12/// # Security
13///
14/// `FixupParser` uses `SandboxRoot` to validate all target paths before any file operations.
15/// This ensures that:
16/// - Paths cannot escape the workspace root via `..` traversal
17/// - Absolute paths are rejected
18/// - Symlinks are rejected by default (configurable via `SandboxConfig`)
19/// - Hardlinks are rejected by default (configurable via `SandboxConfig`)
20///
21/// All path validation happens through `SandboxRoot::join()` which provides
22/// comprehensive security checks before any file I/O.
23pub struct FixupParser {
24    /// Operating mode (preview or apply)
25    pub mode: FixupMode,
26    /// Sandboxed root directory for resolving and validating relative paths
27    pub sandbox_root: SandboxRoot,
28}
29
30impl FixupParser {
31    /// Create a new fixup parser with a sandboxed root directory.
32    ///
33    /// # Arguments
34    ///
35    /// * `mode` - The operating mode (preview or apply)
36    /// * `base_dir` - The base directory to use as a sandbox root
37    ///
38    /// # Errors
39    ///
40    /// Returns an error if base directory cannot be used as a sandbox root
41    /// (e.g., doesn't exist, isn't a directory, or can't be canonicalized).
42    pub fn new(mode: FixupMode, base_dir: PathBuf) -> Result<Self, FixupError> {
43        let sandbox_root = SandboxRoot::new(&base_dir, SandboxConfig::default()).map_err(|e| {
44            FixupError::CanonicalizationError(format!("Failed to create sandbox root: {e}"))
45        })?;
46        Ok(Self { mode, sandbox_root })
47    }
48
49    /// Create a new fixup parser with custom sandbox configuration.
50    ///
51    /// # Arguments
52    ///
53    /// * `mode` - The operating mode (preview or apply)
54    /// * `base_dir` - The base directory to use as a sandbox root
55    /// * `config` - Custom sandbox configuration (e.g., to allow symlinks)
56    ///
57    /// # Errors
58    ///
59    /// Returns an error if base directory cannot be used as a sandbox root.
60    pub fn with_config(
61        mode: FixupMode,
62        base_dir: PathBuf,
63        config: SandboxConfig,
64    ) -> Result<Self, FixupError> {
65        let sandbox_root = SandboxRoot::new(&base_dir, config).map_err(|e| {
66            FixupError::CanonicalizationError(format!("Failed to create sandbox root: {e}"))
67        })?;
68        Ok(Self { mode, sandbox_root })
69    }
70
71    /// Get the sandbox root path.
72    #[must_use]
73    pub fn base_dir(&self) -> &std::path::Path {
74        self.sandbox_root.as_path()
75    }
76
77    /// Validate and resolve a target path within the sandbox.
78    ///
79    /// This method uses `SandboxRoot::join()` to validate the path, ensuring:
80    /// - The path is relative (not absolute)
81    /// - The path doesn't contain `..` traversal components
82    /// - The path doesn't escape the sandbox root
83    /// - The path isn't a symlink (unless configured to allow)
84    /// - The path isn't a hardlink (unless configured to allow)
85    ///
86    /// # Arguments
87    ///
88    /// * `target_file` - The relative path to validate
89    ///
90    /// # Returns
91    ///
92    /// A `SandboxPath` that is guaranteed to be within the sandbox root.
93    pub(super) fn validate_target_path(
94        &self,
95        target_file: &str,
96    ) -> Result<SandboxPath, FixupError> {
97        self.sandbox_root.join(target_file).map_err(|e| match e {
98            SandboxError::AbsolutePath { path } => FixupError::AbsolutePath(PathBuf::from(path)),
99            SandboxError::ParentTraversal { path } => {
100                FixupError::ParentDirEscape(PathBuf::from(path))
101            }
102            SandboxError::EscapeAttempt { path, .. } => {
103                FixupError::OutsideRepo(PathBuf::from(path))
104            }
105            SandboxError::SymlinkNotAllowed { path } => {
106                FixupError::SymlinkNotAllowed(PathBuf::from(path))
107            }
108            SandboxError::HardlinkNotAllowed { path } => {
109                FixupError::HardlinkNotAllowed(PathBuf::from(path))
110            }
111            SandboxError::RootNotFound { path } | SandboxError::RootNotDirectory { path } => {
112                FixupError::CanonicalizationError(format!("Invalid sandbox root: {path}"))
113            }
114            SandboxError::RootCanonicalizationFailed { path, reason } => {
115                FixupError::CanonicalizationError(format!(
116                    "Failed to canonicalize {path}: {reason}"
117                ))
118            }
119            SandboxError::PathCanonicalizationFailed { path, reason } => {
120                FixupError::CanonicalizationError(format!(
121                    "Failed to canonicalize {path}: {reason}"
122                ))
123            }
124        })
125    }
126
127    /// Detect if review output contains fixup markers.
128    #[must_use]
129    pub fn has_fixup_markers(&self, content: &str) -> bool {
130        self.detect_fixup_markers(content).is_some()
131    }
132
133    /// Detect fixup markers in review output.
134    /// Returns the content after the first marker if found.
135    #[must_use]
136    pub fn detect_fixup_markers(&self, content: &str) -> Option<String> {
137        // Look for "FIXUP PLAN:" or "needs fixups" markers
138        let fixup_plan_regex = Regex::new(r"(?i)FIXUP PLAN:").unwrap();
139        let needs_fixups_regex = Regex::new(r"(?i)needs fixups").unwrap();
140
141        if let Some(mat) = fixup_plan_regex.find(content) {
142            return Some(content[mat.end()..].to_string());
143        }
144
145        if let Some(mat) = needs_fixups_regex.find(content) {
146            return Some(content[mat.end()..].to_string());
147        }
148
149        None
150    }
151
152    /// Parse unified diff blocks from fixup content.
153    pub fn parse_diffs(&self, content: &str) -> Result<Vec<UnifiedDiff>, FixupError> {
154        let fixup_content = self
155            .detect_fixup_markers(content)
156            .ok_or(FixupError::NoFixupMarkersFound)?;
157
158        let diffs = self.extract_diff_blocks(&fixup_content)?;
159
160        if diffs.is_empty() {
161            return Err(FixupError::NoValidDiffBlocks);
162        }
163
164        Ok(diffs)
165    }
166
167    /// Extract diff blocks from fenced code blocks.
168    fn extract_diff_blocks(&self, content: &str) -> Result<Vec<UnifiedDiff>, FixupError> {
169        let mut diffs = Vec::new();
170
171        // Regex to match fenced diff blocks: ```diff ... ```
172        // Use (?s) flag to make . match newlines
173        let diff_block_regex = Regex::new(r"(?s)```diff\n(.*?)\n```").unwrap();
174
175        for (block_index, captures) in diff_block_regex.captures_iter(content).enumerate() {
176            let diff_content = captures
177                .get(1)
178                .ok_or_else(|| FixupError::InvalidDiffFormat {
179                    block_index,
180                    reason: "No diff content found in block".to_string(),
181                })?
182                .as_str();
183
184            match self.parse_unified_diff(diff_content, block_index) {
185                Ok(diff) => diffs.push(diff),
186                Err(e) => {
187                    // Log error but continue processing other blocks
188                    tracing::warn!("Failed to parse diff block {block_index}: {e}");
189                }
190            }
191        }
192
193        Ok(diffs)
194    }
195
196    /// Parse a single unified diff block.
197    fn parse_unified_diff(
198        &self,
199        diff_content: &str,
200        block_index: usize,
201    ) -> Result<UnifiedDiff, FixupError> {
202        let lines: Vec<&str> = diff_content.lines().collect();
203
204        if lines.is_empty() {
205            return Err(FixupError::InvalidDiffFormat {
206                block_index,
207                reason: "Empty diff block".to_string(),
208            });
209        }
210
211        // Find --- and +++ headers
212        let mut old_file = None;
213        let mut new_file = None;
214        let mut header_end = 0;
215
216        for (i, line) in lines.iter().enumerate() {
217            if let Some(rest) = line.strip_prefix("--- ") {
218                old_file = Some(rest.trim());
219            } else if let Some(rest) = line.strip_prefix("+++ ") {
220                new_file = Some(rest.trim());
221                header_end = i + 1;
222                break;
223            }
224        }
225
226        let target_file = new_file
227            .or(old_file)
228            .ok_or_else(|| FixupError::InvalidDiffFormat {
229                block_index,
230                reason: "No --- or +++ headers found".to_string(),
231            })?;
232
233        // Remove a/ and b/ prefixes if present (common in git diffs)
234        let target_file = if target_file.starts_with("a/") || target_file.starts_with("b/") {
235            &target_file[2..]
236        } else {
237            target_file
238        };
239
240        // Parse hunks
241        let hunks = self.parse_hunks(&lines[header_end..], block_index)?;
242
243        Ok(UnifiedDiff {
244            path: target_file.to_string(),
245            target_file: target_file.to_string(),
246            diff_content: diff_content.to_string(),
247            hunks,
248        })
249    }
250
251    /// Parse hunks from diff lines.
252    fn parse_hunks(&self, lines: &[&str], block_index: usize) -> Result<Vec<DiffHunk>, FixupError> {
253        let mut hunks = Vec::new();
254        let mut current_hunk_lines: Vec<String> = Vec::new();
255        let mut current_hunk_header: Option<((usize, usize), (usize, usize))> = None;
256
257        // Regex to match hunk headers: @@ -old_start,old_count +new_start,new_count @@
258        // Note: Optional count groups must come after their respective start numbers
259        let hunk_header_regex = Regex::new(r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@").unwrap();
260
261        for line in lines {
262            if let Some(captures) = hunk_header_regex.captures(line) {
263                // Save previous hunk if exists
264                if let Some((old_range, new_range)) = current_hunk_header {
265                    hunks.push(DiffHunk {
266                        start: old_range.0,
267                        remove_count: old_range.1,
268                        add_count: new_range.1,
269                        remove_lines: current_hunk_lines
270                            .iter()
271                            .filter(|line| line.starts_with('-'))
272                            .map(|line| line[1..].to_string())
273                            .collect(),
274                        add_lines: current_hunk_lines
275                            .iter()
276                            .filter(|line| line.starts_with('+'))
277                            .map(|line| line[1..].to_string())
278                            .collect(),
279                        old_range,
280                        new_range,
281                        content: current_hunk_lines.join("\n"),
282                    });
283                }
284
285                // Parse new hunk header
286                let old_start: usize = captures.get(1).unwrap().as_str().parse().map_err(|_| {
287                    FixupError::InvalidDiffFormat {
288                        block_index,
289                        reason: "Invalid old start line number".to_string(),
290                    }
291                })?;
292
293                let old_count: usize = captures
294                    .get(2)
295                    .map_or(1, |m| m.as_str().parse().unwrap_or(1));
296
297                let new_start: usize = captures.get(3).unwrap().as_str().parse().map_err(|_| {
298                    FixupError::InvalidDiffFormat {
299                        block_index,
300                        reason: "Invalid new start line number".to_string(),
301                    }
302                })?;
303
304                let new_count: usize = captures
305                    .get(4)
306                    .map_or(1, |m| m.as_str().parse().unwrap_or(1));
307
308                current_hunk_header = Some(((old_start, old_count), (new_start, new_count)));
309                current_hunk_lines = vec![(*line).to_string()];
310            } else {
311                // Add line to current hunk
312                current_hunk_lines.push((*line).to_string());
313            }
314        }
315
316        // Save last hunk if exists
317        if let Some((old_range, new_range)) = current_hunk_header {
318            hunks.push(DiffHunk {
319                start: old_range.0,
320                remove_count: old_range.1,
321                add_count: new_range.1,
322                remove_lines: current_hunk_lines
323                    .iter()
324                    .filter(|line| line.starts_with('-'))
325                    .map(|line| line[1..].to_string())
326                    .collect(),
327                add_lines: current_hunk_lines
328                    .iter()
329                    .filter(|line| line.starts_with('+'))
330                    .map(|line| line[1..].to_string())
331                    .collect(),
332                old_range,
333                new_range,
334                content: current_hunk_lines.join("\n"),
335            });
336        }
337
338        Ok(hunks)
339    }
340}
341
342#[cfg(test)]
343mod tests {
344    use super::*;
345    use tempfile::TempDir;
346
347    #[test]
348    fn test_detect_fixup_markers() {
349        let temp_dir = TempDir::new().unwrap();
350        let parser = FixupParser::new(FixupMode::Preview, temp_dir.path().to_path_buf()).unwrap();
351
352        // Test FIXUP PLAN: marker
353        let content1 = "Some review content\nFIXUP PLAN:\nHere are the fixes needed...";
354        assert!(parser.has_fixup_markers(content1));
355
356        // Test needs fixups marker
357        let content2 = "The review shows that this needs fixups in several areas...";
358        assert!(parser.has_fixup_markers(content2));
359
360        // Test no markers
361        let content3 = "This is a clean review with no issues found.";
362        assert!(!parser.has_fixup_markers(content3));
363    }
364
365    #[test]
366    fn test_parse_simple_diff() {
367        let temp_dir = TempDir::new().unwrap();
368        let parser = FixupParser::new(FixupMode::Preview, temp_dir.path().to_path_buf()).unwrap();
369
370        let content = r#"
371FIXUP PLAN:
372The following changes are needed:
373
374```diff
375--- a/src/main.rs
376+++ b/src/main.rs
377@@ -1,3 +1,4 @@
378fn main() {
379+    println!("Hello, world!");
380     // TODO: implement
381}
382```
383"#;
384
385        let diffs = parser.parse_diffs(content).unwrap();
386        assert_eq!(diffs.len(), 1);
387        assert_eq!(diffs[0].target_file, "src/main.rs");
388        assert_eq!(diffs[0].hunks.len(), 1);
389    }
390
391    #[test]
392    fn test_parse_multiple_hunks() {
393        let temp_dir = TempDir::new().unwrap();
394        let parser = FixupParser::new(FixupMode::Preview, temp_dir.path().to_path_buf()).unwrap();
395
396        let content = r#"
397FIXUP PLAN:
398Multiple changes needed:
399
400```diff
401--- a/src/lib.rs
402+++ b/src/lib.rs
403@@ -1,3 +1,4 @@
404pub fn foo() {
405+    println!("Starting foo");
406     // implementation
407}
408@@ -10,2 +11,3 @@
409pub fn bar() {
410+    println!("Starting bar");
411     // implementation
412}
413```
414"#;
415
416        let diffs = parser.parse_diffs(content).unwrap();
417        assert_eq!(diffs.len(), 1);
418        assert_eq!(diffs[0].target_file, "src/lib.rs");
419        assert_eq!(diffs[0].hunks.len(), 2);
420
421        // Verify first hunk
422        let hunk1 = &diffs[0].hunks[0];
423        assert_eq!(hunk1.start, 1);
424        assert_eq!(hunk1.remove_count, 3);
425        assert_eq!(hunk1.add_count, 4);
426
427        // Verify second hunk
428        let hunk2 = &diffs[0].hunks[1];
429        assert_eq!(hunk2.start, 10);
430        assert_eq!(hunk2.remove_count, 2);
431        assert_eq!(hunk2.add_count, 3);
432
433        // Verify second hunk
434        let hunk2 = &diffs[0].hunks[1];
435        assert_eq!(hunk2.start, 10);
436        assert_eq!(hunk2.remove_count, 2);
437        assert_eq!(hunk2.add_count, 3);
438    }
439
440    #[test]
441    fn test_parse_multiple_diffs() {
442        let temp_dir = TempDir::new().unwrap();
443        let parser = FixupParser::new(FixupMode::Preview, temp_dir.path().to_path_buf()).unwrap();
444
445        let content = r#"
446FIXUP PLAN:
447Changes needed in multiple files:
448
449```diff
450--- a/src/main.rs
451+++ b/src/main.rs
452@@ -1,2 +1,3 @@
453fn main() {
454+    println!("Hello");
455}
456```
457
458```diff
459--- a/src/lib.rs
460+++ b/src/lib.rs
461@@ -1,2 +1,3 @@
462pub fn test() {
463+    println!("Test");
464}
465```
466"#;
467
468        let diffs = parser.parse_diffs(content).unwrap();
469        assert_eq!(diffs.len(), 2);
470        assert_eq!(diffs[0].target_file, "src/main.rs");
471        assert_eq!(diffs[0].hunks.len(), 1);
472        assert_eq!(diffs[1].target_file, "src/lib.rs");
473        assert_eq!(diffs[1].hunks.len(), 1);
474    }
475
476    #[test]
477    fn test_parse_diff_without_git_prefix() {
478        let temp_dir = TempDir::new().unwrap();
479        let parser = FixupParser::new(FixupMode::Preview, temp_dir.path().to_path_buf()).unwrap();
480
481        let content = r#"
482FIXUP PLAN:
483
484```diff
485--- src/main.rs
486+++ src/main.rs
487@@ -1,2 +1,3 @@
488fn main() {
489+    println!("Hello");
490}
491```
492"#;
493
494        let diffs = parser.parse_diffs(content).unwrap();
495        assert_eq!(diffs.len(), 1);
496        assert_eq!(diffs[0].target_file, "src/main.rs");
497        assert_eq!(diffs[0].hunks.len(), 1);
498    }
499
500    #[test]
501    fn test_hunk_range_parsing() {
502        let temp_dir = TempDir::new().unwrap();
503        let parser = FixupParser::new(FixupMode::Preview, temp_dir.path().to_path_buf()).unwrap();
504
505        let content = r#"
506FIXUP PLAN:
507
508```diff
509--- a/test.txt
510+++ b/test.txt
511@@ -5,3 +5,4 @@
512line 5
513+new line
514line 6
515line 7
516@@ -10 +11,2 @@
517line 10
518+another new line
519```
520"#;
521
522        let diffs = parser.parse_diffs(content).unwrap();
523        assert_eq!(diffs.len(), 1);
524        assert_eq!(diffs[0].hunks.len(), 2);
525
526        // First hunk: @@ -5,3 +5,4 @@
527        let hunk1 = &diffs[0].hunks[0];
528        assert_eq!(hunk1.old_range, (5, 3));
529        assert_eq!(hunk1.new_range, (5, 4));
530
531        // Second hunk: @@ -10 +11,2 @@ (implicit count of 1)
532        let hunk2 = &diffs[0].hunks[1];
533        assert_eq!(hunk2.old_range, (10, 1));
534        assert_eq!(hunk2.new_range, (11, 2));
535
536        // Verify first hunk - start should match old_range.0
537        let hunk1 = &diffs[0].hunks[0];
538        assert_eq!(hunk1.start, 5); // old_range.0
539        assert_eq!(hunk1.remove_count, 3);
540        assert_eq!(hunk1.add_count, 4);
541
542        // Verify second hunk
543        let hunk2 = &diffs[0].hunks[1];
544        assert_eq!(hunk2.old_range, (10, 1));
545        assert_eq!(hunk2.remove_count, 1); // old_range.1
546        assert_eq!(hunk2.new_range, (11, 2));
547    }
548
549    #[test]
550    fn test_empty_diff_block() {
551        let temp_dir = TempDir::new().unwrap();
552        let parser = FixupParser::new(FixupMode::Preview, temp_dir.path().to_path_buf()).unwrap();
553
554        let content = r#"
555FIXUP PLAN:
556
557```diff
558```
559"#;
560
561        let result = parser.parse_diffs(content);
562        // Should fail with NoValidDiffBlocks since diff block is empty
563        assert!(result.is_err());
564        assert!(matches!(result.unwrap_err(), FixupError::NoValidDiffBlocks));
565    }
566
567    #[test]
568    fn test_malformed_hunk_header() {
569        let temp_dir = TempDir::new().unwrap();
570        let parser = FixupParser::new(FixupMode::Preview, temp_dir.path().to_path_buf()).unwrap();
571
572        let content = r#"
573FIXUP PLAN:
574
575```diff
576--- a/test.txt
577+++ b/test.txt
578@@ invalid hunk header @@
579some content
580```
581"#;
582
583        let diffs = parser.parse_diffs(content).unwrap();
584        assert_eq!(diffs.len(), 1);
585        assert_eq!(diffs[0].hunks.len(), 0);
586    }
587
588    #[test]
589    fn test_no_fixup_markers() {
590        let temp_dir = TempDir::new().unwrap();
591        let parser = FixupParser::new(FixupMode::Preview, temp_dir.path().to_path_buf()).unwrap();
592
593        let content = "This is just regular review content without any fixup markers.";
594
595        let result = parser.parse_diffs(content);
596        assert!(result.is_err());
597        assert!(matches!(
598            result.unwrap_err(),
599            FixupError::NoFixupMarkersFound
600        ));
601    }
602
603    #[test]
604    fn test_case_insensitive_fixup_markers() {
605        let temp_dir = TempDir::new().unwrap();
606        let parser = FixupParser::new(FixupMode::Preview, temp_dir.path().to_path_buf()).unwrap();
607
608        // Test various case variations
609        let content1 = "fixup plan:\nSome content";
610        assert!(parser.has_fixup_markers(content1));
611
612        let content2 = "FIXUP PLAN:\nSome content";
613        assert!(parser.has_fixup_markers(content2));
614
615        let content3 = "Fixup Plan:\nSome content";
616        assert!(parser.has_fixup_markers(content3));
617
618        let content4 = "needs FIXUPS in several places";
619        assert!(parser.has_fixup_markers(content4));
620    }
621}