Skip to main content

xchecker_engine/fixup/
apply.rs

1use std::collections::HashMap;
2
3use camino::Utf8Path;
4use tempfile::TempDir;
5
6use crate::atomic_write::write_file_atomic;
7use crate::error::FixupError;
8use crate::runner::CommandSpec;
9
10use super::model::{AppliedFile, ChangeSummary, FixupMode, FixupPreview, FixupResult, UnifiedDiff};
11use super::parse::FixupParser;
12
13impl FixupParser {
14    /// Preview changes without applying them
15    ///
16    /// Line endings are normalized before calculating diff statistics (FR-FIX-010)
17    ///
18    /// # Security
19    ///
20    /// All target paths are validated through `SandboxRoot::join()` to ensure:
21    /// - Paths cannot escape the workspace root via `..` traversal
22    /// - Absolute paths are rejected
23    /// - Symlinks are rejected by default (configurable via `SandboxConfig`)
24    /// - Hardlinks are rejected by default (configurable via `SandboxConfig`)
25    pub fn preview_changes(&self, diffs: &[UnifiedDiff]) -> Result<FixupPreview, FixupError> {
26        let mut target_files = Vec::new();
27        let mut change_summary = HashMap::new();
28        let mut warnings = Vec::new();
29        let mut all_valid = true;
30
31        for diff in diffs {
32            target_files.push(diff.target_file.clone());
33
34            let mut validation_messages = Vec::new();
35            let mut validation_passed = true;
36
37            // 1. Path validation using SandboxRoot::join() via validate_target_path
38            // This replaces the legacy validate_fixup_target function with the
39            // centralized sandbox validation that provides comprehensive security checks.
40            if let Err(e) = self.validate_target_path(&diff.target_file) {
41                validation_passed = false;
42                all_valid = false;
43                let msg = format!("Invalid target path: {e}");
44                validation_messages.push(msg.clone());
45                warnings.push(format!("{}: {}", diff.target_file, msg));
46            }
47
48            // 2. Validate diff against temp copy (only if path validation passed)
49            if validation_passed {
50                match self.validate_diff_with_git_apply(diff) {
51                    Ok(messages) => {
52                        validation_messages.extend(messages);
53                    }
54                    Err(e) => {
55                        validation_passed = false;
56                        all_valid = false;
57                        warnings.push(format!("Validation failed for {}: {}", diff.target_file, e));
58                        validation_messages.push(e.to_string());
59                    }
60                }
61            }
62
63            // Calculate change statistics (with line ending normalization - FR-FIX-010)
64            let (lines_added, lines_removed) = self.calculate_change_stats(diff);
65
66            change_summary.insert(
67                diff.target_file.clone(),
68                ChangeSummary {
69                    hunk_count: diff.hunks.len(),
70                    lines_added,
71                    lines_removed,
72                    validation_passed,
73                    validation_messages,
74                },
75            );
76        }
77
78        Ok(FixupPreview {
79            target_files,
80            change_summary,
81            warnings,
82            all_valid,
83        })
84    }
85
86    /// Apply changes to files using atomic writes (FR-FIX-005, FR-FIX-006, FR-FIX-008)
87    ///
88    /// This method implements the atomic write pattern:
89    /// 1. Validate target path using SandboxPath (security check)
90    /// 2. Write to .tmp file with fsync
91    /// 3. Create .bak backup if file exists
92    /// 4. Atomic rename with Windows retry
93    /// 5. Preserve file permissions (Unix) or attributes (Windows)
94    /// 6. Record warnings if permission preservation fails
95    ///
96    /// # Security
97    ///
98    /// All target paths are validated through `SandboxRoot::join()` to ensure:
99    /// - Paths cannot escape the workspace root via `..` traversal
100    /// - Absolute paths are rejected
101    /// - Symlinks are rejected by default (configurable via `SandboxConfig`)
102    /// - Hardlinks are rejected by default (configurable via `SandboxConfig`)
103    pub fn apply_changes(&self, diffs: &[UnifiedDiff]) -> Result<FixupResult, FixupError> {
104        if self.mode != FixupMode::Apply {
105            return Err(FixupError::DiffParsingFailed {
106                reason: "Cannot apply changes in preview mode".to_string(),
107            });
108        }
109
110        let mut applied_files = Vec::new();
111        let mut failed_files = Vec::new();
112        let mut warnings = Vec::new();
113
114        for diff in diffs {
115            // Validate target path before applying using SandboxPath (hard error in apply mode)
116            // This replaces the legacy validate_fixup_target function with the
117            // centralized sandbox validation that provides comprehensive security checks.
118            if let Err(e) = self.validate_target_path(&diff.target_file) {
119                failed_files.push(diff.target_file.clone());
120                warnings.push(format!(
121                    "Path validation failed for {}: {}",
122                    diff.target_file, e
123                ));
124                continue;
125            }
126
127            match self.apply_single_diff_atomic(diff) {
128                Ok(applied_file) => {
129                    // Collect file-specific warnings
130                    for warning in &applied_file.warnings {
131                        warnings.push(format!("{}: {}", diff.target_file, warning));
132                    }
133                    applied_files.push(applied_file);
134                }
135                Err(e) => {
136                    failed_files.push(diff.target_file.clone());
137                    warnings.push(format!("Failed to apply {}: {}", diff.target_file, e));
138                }
139            }
140        }
141
142        Ok(FixupResult {
143            applied_files,
144            failed_files,
145            warnings,
146            three_way_used: false, // Not using git apply anymore
147        })
148    }
149
150    /// Apply a single diff using atomic writes with backup and permission preservation
151    ///
152    /// This method implements FR-FIX-005, FR-FIX-006, FR-FIX-007, and FR-FIX-008:
153    /// - FR-FIX-005: Atomic write with temp file + fsync + rename
154    /// - FR-FIX-006: Create .bak backup and preserve file permissions
155    /// - FR-FIX-007: Cross-filesystem fallback (copy+fsync+replace)
156    /// - FR-FIX-008: Record warnings for permission preservation failures
157    ///
158    /// # Security
159    ///
160    /// The target path is validated through `SandboxRoot::join()` before any file operations.
161    fn apply_single_diff_atomic(&self, diff: &UnifiedDiff) -> Result<AppliedFile, FixupError> {
162        use std::fs;
163
164        // Validate and get the sandboxed target path
165        // This ensures the path is within the sandbox root and passes all security checks
166        let sandbox_path = self.validate_target_path(&diff.target_file)?;
167        let target_path = sandbox_path.as_path();
168
169        if !target_path.exists() {
170            return Err(FixupError::TargetFileNotFound {
171                path: diff.target_file.clone(),
172            });
173        }
174
175        let mut file_warnings = Vec::new();
176
177        // Read original content with CRLF tolerance (FR-FS-005)
178        // Line endings will be normalized during diff application
179        let original_content =
180            fs::read_to_string(target_path).map_err(|e| FixupError::TempCopyFailed {
181                file: diff.target_file.clone(),
182                reason: format!("Failed to read original file: {e}"),
183            })?;
184
185        // Get original file permissions/attributes before modification
186        let original_metadata =
187            fs::metadata(target_path).map_err(|e| FixupError::TempCopyFailed {
188                file: diff.target_file.clone(),
189                reason: format!("Failed to get file metadata: {e}"),
190            })?;
191
192        #[cfg(unix)]
193        let original_permissions = {
194            use std::os::unix::fs::PermissionsExt;
195            Some(original_metadata.permissions().mode())
196        };
197
198        #[cfg(windows)]
199        let original_readonly = original_metadata.permissions().readonly();
200
201        // Apply the diff to get new content
202        let new_content = self.apply_diff_to_content(&original_content, diff)?;
203
204        // Compute BLAKE3 hash of new content
205        let blake3_hash = self.compute_blake3_hash(&new_content);
206        let blake3_first8 = blake3_hash[..8].to_string();
207
208        // Create .bak backup (FR-FIX-006)
209        let backup_path = target_path.with_extension("bak");
210        fs::copy(target_path, &backup_path).map_err(|e| FixupError::TempCopyFailed {
211            file: diff.target_file.clone(),
212            reason: format!("Failed to create .bak backup: {e}"),
213        })?;
214
215        // Convert PathBuf to Utf8Path for atomic_write
216        let target_utf8_path =
217            Utf8Path::from_path(target_path).ok_or_else(|| FixupError::TempCopyFailed {
218                file: diff.target_file.clone(),
219                reason: "Path contains invalid UTF-8".to_string(),
220            })?;
221
222        // Use centralized atomic write with cross-filesystem fallback (FR-FIX-005, FR-FIX-007)
223        let write_result = write_file_atomic(target_utf8_path, &new_content).map_err(|e| {
224            FixupError::TempCopyFailed {
225                file: diff.target_file.clone(),
226                reason: format!("Failed to write file atomically: {e}"),
227            }
228        })?;
229
230        // Collect warnings from atomic write (FR-FIX-007, FR-FIX-008)
231        file_warnings.extend(write_result.warnings);
232
233        // Preserve file permissions (FR-FIX-006, FR-FIX-008)
234        #[cfg(unix)]
235        {
236            if let Some(mode) = original_permissions {
237                use std::os::unix::fs::PermissionsExt;
238                let permissions = fs::Permissions::from_mode(mode);
239                if let Err(e) = fs::set_permissions(target_path, permissions) {
240                    file_warnings.push(format!("Failed to preserve file permissions: {}", e));
241                }
242            }
243        }
244
245        #[cfg(windows)]
246        {
247            if let Ok(metadata) = fs::metadata(target_path) {
248                let mut permissions = metadata.permissions();
249                permissions.set_readonly(original_readonly);
250                if let Err(e) = fs::set_permissions(target_path, permissions) {
251                    file_warnings.push(format!("Failed to preserve file attributes: {e}"));
252                }
253            }
254        }
255
256        Ok(AppliedFile {
257            path: diff.target_file.clone(),
258            blake3_first8,
259            applied: true,
260            warnings: file_warnings,
261        })
262    }
263
264    /// Apply a diff to content string with fuzzy matching support
265    ///
266    /// Line endings are normalized to LF before applying the diff (FR-FIX-010).
267    /// If the hunk's expected line position doesn't match, we search within a
268    /// window (+/- FUZZY_SEARCH_WINDOW lines) to find the best matching context.
269    fn apply_diff_to_content(
270        &self,
271        content: &str,
272        diff: &UnifiedDiff,
273    ) -> Result<String, FixupError> {
274        const FUZZY_SEARCH_WINDOW: usize = 50;
275        const MIN_CONTEXT_MATCH_RATIO: f64 = 0.7;
276
277        // Normalize line endings before applying diff (FR-FIX-010, FR-FS-005)
278        let normalized_content = normalize_line_endings_for_diff(content);
279        let mut lines: Vec<String> = normalized_content
280            .lines()
281            .map(std::string::ToString::to_string)
282            .collect();
283
284        // Track cumulative offset from previous hunks' additions/deletions
285        let mut cumulative_offset: i64 = 0;
286
287        // Apply each hunk in order
288        for hunk in &diff.hunks {
289            let (old_start, _old_count) = hunk.old_range;
290            let hunk_lines: Vec<&str> = hunk.content.lines().collect();
291
292            // Extract OLD lines from hunk (context lines + removed lines, NOT added lines)
293            // These represent what exists in the original file and should be used for matching
294            let context_lines: Vec<&str> = hunk_lines
295                .iter()
296                .skip(1) // Skip @@ header
297                .filter(|line| {
298                    // Include context lines (starting with ' ') and removed lines (starting with '-')
299                    // but NOT added lines (starting with '+') as those don't exist in the original file
300                    line.starts_with(' ')
301                        || line.starts_with('-')
302                        || (!line.starts_with('+') && !line.starts_with("@@"))
303                })
304                .filter(|line| !line.starts_with("---")) // Exclude --- header
305                .map(|line| {
306                    // Strip the prefix character (' ' or '-')
307                    if line.starts_with(' ') || line.starts_with('-') {
308                        &line[1..]
309                    } else {
310                        *line
311                    }
312                })
313                .collect();
314
315            // Calculate expected position with cumulative offset
316            let expected_pos = ((old_start as i64 - 1) + cumulative_offset).max(0) as usize;
317
318            // Try exact match first, then fuzzy match if needed
319            let actual_start = if self.context_matches_at(&lines, expected_pos, &context_lines) {
320                expected_pos
321            } else {
322                // Fuzzy search within window
323                match self.find_best_context_match(
324                    &lines,
325                    expected_pos,
326                    &context_lines,
327                    FUZZY_SEARCH_WINDOW,
328                    MIN_CONTEXT_MATCH_RATIO,
329                ) {
330                    Some((pos, _confidence)) => {
331                        tracing::warn!(
332                            "Fuzzy match: hunk at line {} shifted to line {} in '{}'",
333                            old_start,
334                            pos + 1,
335                            diff.target_file
336                        );
337                        pos
338                    }
339                    None => {
340                        return Err(FixupError::FuzzyMatchFailed {
341                            file: diff.target_file.clone(),
342                            expected_line: old_start,
343                            search_window: FUZZY_SEARCH_WINDOW,
344                        });
345                    }
346                }
347            };
348
349            // Track additions and deletions for offset calculation
350            let mut additions = 0i64;
351            let mut deletions = 0i64;
352
353            // Apply hunk at actual_start position
354            let mut hunk_idx = 1; // Start after @@ line
355            let mut file_idx = actual_start;
356
357            while hunk_idx < hunk_lines.len() {
358                let line = hunk_lines[hunk_idx];
359
360                if line.starts_with('+') && !line.starts_with("+++") {
361                    // Add line
362                    let new_line = line[1..].to_string();
363                    if file_idx <= lines.len() {
364                        lines.insert(file_idx, new_line);
365                    } else {
366                        lines.push(new_line);
367                    }
368                    file_idx += 1;
369                    additions += 1;
370                } else if line.starts_with('-') && !line.starts_with("---") {
371                    // Remove line
372                    if file_idx < lines.len() {
373                        lines.remove(file_idx);
374                        deletions += 1;
375                    }
376                } else if line.starts_with(' ') {
377                    // Context line - just advance
378                    file_idx += 1;
379                } else if !line.starts_with("@@") {
380                    // Context line without leading space
381                    file_idx += 1;
382                }
383
384                hunk_idx += 1;
385            }
386
387            // Update cumulative offset for subsequent hunks
388            cumulative_offset += additions - deletions;
389        }
390
391        Ok(lines.join("\n") + "\n")
392    }
393
394    /// Compute BLAKE3 hash of content
395    fn compute_blake3_hash(&self, content: &str) -> String {
396        let hash = blake3::hash(content.as_bytes());
397        hash.to_hex().to_string()
398    }
399
400    /// Apply changes to files (legacy git apply method - kept for compatibility)
401    #[allow(dead_code)]
402    pub fn apply_changes_with_git(&self, diffs: &[UnifiedDiff]) -> Result<FixupResult, FixupError> {
403        if self.mode != FixupMode::Apply {
404            return Err(FixupError::DiffParsingFailed {
405                reason: "Cannot apply changes in preview mode".to_string(),
406            });
407        }
408
409        let mut applied_files = Vec::new();
410        let mut failed_files = Vec::new();
411        let mut warnings = Vec::new();
412        let mut three_way_used = false;
413
414        for diff in diffs {
415            match self.apply_single_diff(diff) {
416                Ok(used_three_way) => {
417                    applied_files.push(AppliedFile {
418                        path: diff.target_file.clone(),
419                        blake3_first8: "00000000".to_string(), // Not computed in git apply mode
420                        applied: true,
421                        warnings: Vec::new(),
422                    });
423                    if used_three_way {
424                        three_way_used = true;
425                        warnings.push(format!("Used 3-way merge for {}", diff.target_file));
426                    }
427                }
428                Err(e) => {
429                    failed_files.push(diff.target_file.clone());
430                    warnings.push(format!("Failed to apply {}: {}", diff.target_file, e));
431                }
432            }
433        }
434
435        Ok(FixupResult {
436            applied_files,
437            failed_files,
438            warnings,
439            three_way_used,
440        })
441    }
442
443    /// Validate a diff using git apply --check
444    ///
445    /// # Security
446    ///
447    /// The target path is validated through `SandboxRoot::join()` before any file operations.
448    fn validate_diff_with_git_apply(&self, diff: &UnifiedDiff) -> Result<Vec<String>, FixupError> {
449        // Validate and get the sandboxed target path
450        let sandbox_path = self.validate_target_path(&diff.target_file)?;
451        let target_path = sandbox_path.as_path();
452
453        if !target_path.exists() {
454            return Err(FixupError::TargetFileNotFound {
455                path: diff.target_file.clone(),
456            });
457        }
458
459        // Create temporary directory and copy target file
460        let temp_dir = TempDir::new().map_err(|e| FixupError::TempCopyFailed {
461            file: diff.target_file.clone(),
462            reason: e.to_string(),
463        })?;
464
465        let temp_file = temp_dir.path().join("target_file");
466        std::fs::copy(target_path, &temp_file).map_err(|e| FixupError::TempCopyFailed {
467            file: diff.target_file.clone(),
468            reason: e.to_string(),
469        })?;
470
471        // Write diff to temporary file
472        let diff_file = temp_dir.path().join("changes.diff");
473        std::fs::write(&diff_file, &diff.diff_content).map_err(|e| FixupError::TempCopyFailed {
474            file: "diff".to_string(),
475            reason: e.to_string(),
476        })?;
477
478        // Run git apply --check using CommandSpec for secure argv-style execution
479        let output = CommandSpec::new("git")
480            .args(["apply", "--check", "--verbose"])
481            .arg(&diff_file)
482            .cwd(temp_dir.path())
483            .to_command()
484            .output()
485            .map_err(|e| FixupError::GitApplyValidationFailed {
486                target_file: diff.target_file.clone(),
487                reason: format!("Failed to run git apply: {e}"),
488            })?;
489
490        if !output.status.success() {
491            let stderr = String::from_utf8_lossy(&output.stderr);
492            return Err(FixupError::GitApplyValidationFailed {
493                target_file: diff.target_file.clone(),
494                reason: stderr.to_string(),
495            });
496        }
497
498        // Return any warnings from stdout/stderr
499        let stdout = String::from_utf8_lossy(&output.stdout);
500        let stderr = String::from_utf8_lossy(&output.stderr);
501
502        let mut messages = Vec::new();
503        if !stdout.is_empty() {
504            messages.push(stdout.to_string());
505        }
506        if !stderr.is_empty() {
507            messages.push(stderr.to_string());
508        }
509
510        Ok(messages)
511    }
512
513    /// Apply a single diff to its target file
514    ///
515    /// # Security
516    ///
517    /// The target path is validated through `SandboxRoot::join()` before any file operations.
518    fn apply_single_diff(&self, diff: &UnifiedDiff) -> Result<bool, FixupError> {
519        // Validate and get the sandboxed target path
520        let sandbox_path = self.validate_target_path(&diff.target_file)?;
521        let target_path = sandbox_path.as_path();
522
523        if !target_path.exists() {
524            return Err(FixupError::TargetFileNotFound {
525                path: diff.target_file.clone(),
526            });
527        }
528
529        // Write diff to temporary file
530        let temp_dir = TempDir::new().map_err(|e| FixupError::TempCopyFailed {
531            file: diff.target_file.clone(),
532            reason: e.to_string(),
533        })?;
534
535        let diff_file = temp_dir.path().join("changes.diff");
536        std::fs::write(&diff_file, &diff.diff_content).map_err(|e| FixupError::TempCopyFailed {
537            file: "diff".to_string(),
538            reason: e.to_string(),
539        })?;
540
541        // First try git apply --check using CommandSpec for secure argv-style execution
542        let check_output = CommandSpec::new("git")
543            .args(["apply", "--check"])
544            .arg(&diff_file)
545            .cwd(self.base_dir())
546            .to_command()
547            .output()
548            .map_err(|e| FixupError::GitApplyValidationFailed {
549                target_file: diff.target_file.clone(),
550                reason: format!("Failed to run git apply --check: {e}"),
551            })?;
552
553        if !check_output.status.success() {
554            // Try with 3-way merge as last resort using CommandSpec
555            let three_way_output = CommandSpec::new("git")
556                .args(["apply", "--3way"])
557                .arg(&diff_file)
558                .cwd(self.base_dir())
559                .to_command()
560                .output()
561                .map_err(|e| FixupError::GitApplyExecutionFailed {
562                    target_file: diff.target_file.clone(),
563                    reason: format!("Failed to run git apply --3way: {e}"),
564                })?;
565
566            if !three_way_output.status.success() {
567                let stderr = String::from_utf8_lossy(&three_way_output.stderr);
568                return Err(FixupError::GitApplyExecutionFailed {
569                    target_file: diff.target_file.clone(),
570                    reason: stderr.to_string(),
571                });
572            }
573
574            return Ok(true); // Used 3-way merge
575        }
576
577        // Apply the diff normally using CommandSpec for secure argv-style execution
578        let apply_output = CommandSpec::new("git")
579            .args(["apply"])
580            .arg(&diff_file)
581            .cwd(self.base_dir())
582            .to_command()
583            .output()
584            .map_err(|e| FixupError::GitApplyExecutionFailed {
585                target_file: diff.target_file.clone(),
586                reason: format!("Failed to run git apply: {e}"),
587            })?;
588
589        if !apply_output.status.success() {
590            let stderr = String::from_utf8_lossy(&apply_output.stderr);
591            return Err(FixupError::GitApplyExecutionFailed {
592                target_file: diff.target_file.clone(),
593                reason: stderr.to_string(),
594            });
595        }
596
597        Ok(false) // Did not use 3-way merge
598    }
599
600    /// Calculate change statistics from a diff
601    fn calculate_change_stats(&self, diff: &UnifiedDiff) -> (usize, usize) {
602        let mut lines_added = 0;
603        let mut lines_removed = 0;
604
605        for hunk in &diff.hunks {
606            for line in hunk.content.lines() {
607                if line.starts_with('+') && !line.starts_with("+++") {
608                    lines_added += 1;
609                } else if line.starts_with('-') && !line.starts_with("---") {
610                    lines_removed += 1;
611                }
612            }
613        }
614
615        (lines_added, lines_removed)
616    }
617}
618
619/// Normalize line endings to LF (FR-FIX-010, FR-FS-004, FR-FS-005)
620///
621/// This function converts all line ending styles (CRLF, CR, LF) to LF.
622/// This ensures consistent diff calculation regardless of the source file's
623/// line ending style, which is especially important on Windows where files
624/// may have CRLF line endings.
625///
626/// # Arguments
627///
628/// * `content` - The content to normalize
629///
630/// # Returns
631///
632/// A string with all line endings normalized to LF (\n)
633///
634/// # Examples
635///
636/// ```
637/// use xchecker_engine::fixup::normalize_line_endings_for_diff;
638///
639/// let crlf_content = "line1\r\nline2\r\n";
640/// let normalized = normalize_line_endings_for_diff(crlf_content);
641/// assert_eq!(normalized, "line1\nline2\n");
642/// ```
643#[must_use]
644pub fn normalize_line_endings_for_diff(content: &str) -> String {
645    content.replace("\r\n", "\n").replace('\r', "\n")
646}
647
648#[cfg(test)]
649mod tests {
650    use super::*;
651    use tempfile::TempDir;
652
653    #[test]
654    fn test_calculate_change_stats() {
655        let temp_dir = TempDir::new().unwrap();
656        let parser = FixupParser::new(FixupMode::Preview, temp_dir.path().to_path_buf()).unwrap();
657
658        let content = r#"
659FIXUP PLAN:
660
661```diff
662--- a/src/test.rs
663+++ b/src/test.rs
664@@ -1,5 +1,6 @@
665 fn test() {
666+    let x = 1;
667     let y = 2;
668-    let z = 3;
669+    let z = 4;
670     println!("test");
671 }
672```
673"#;
674
675        let diffs = parser.parse_diffs(content).unwrap();
676        assert_eq!(diffs.len(), 1);
677
678        let (added, removed) = parser.calculate_change_stats(&diffs[0]);
679        assert_eq!(added, 2); // +let x = 1; and +let z = 4;
680        assert_eq!(removed, 1); // -let z = 3;
681    }
682}