Skip to main content

codex_patcher/
edit.rs

1use std::fs;
2use std::io::Write;
3use std::path::{Path, PathBuf};
4use thiserror::Error;
5use xxhash_rust::xxh3::xxh3_64;
6
7/// The fundamental edit primitive: byte-span replacement with verification.
8///
9/// All high-level operations (AST transforms, structural edits, diffs) compile down
10/// to this single primitive. Intelligence lives in span acquisition, not application.
11#[derive(Debug, Clone, PartialEq, Eq)]
12#[must_use = "Edit does nothing until apply() is called"]
13pub struct Edit {
14    /// Path to the file to edit (absolute, workspace-relative resolved)
15    pub file: PathBuf,
16    /// Starting byte offset (inclusive)
17    pub byte_start: usize,
18    /// Ending byte offset (exclusive)
19    pub byte_end: usize,
20    /// New text to insert at [byte_start, byte_end)
21    pub new_text: String,
22    /// Verification of what we expect to find before applying
23    pub expected_before: EditVerification,
24}
25
26/// Verification strategy for edit safety.
27#[derive(Debug, Clone, PartialEq, Eq)]
28pub enum EditVerification {
29    /// Exact text match required
30    ExactMatch(String),
31    /// xxh3 hash of expected text (faster for large spans)
32    Hash(u64),
33}
34
35impl EditVerification {
36    /// Check if the provided text matches the verification criteria.
37    pub fn matches(&self, text: &str) -> bool {
38        match self {
39            EditVerification::ExactMatch(expected) => text == expected,
40            EditVerification::Hash(expected_hash) => {
41                let actual_hash = xxh3_64(text.as_bytes());
42                actual_hash == *expected_hash
43            }
44        }
45    }
46
47    /// Create verification from text, using hash for text over 1KB.
48    pub fn from_text(text: &str) -> Self {
49        if text.len() > 1024 {
50            EditVerification::Hash(xxh3_64(text.as_bytes()))
51        } else {
52            EditVerification::ExactMatch(text.to_string())
53        }
54    }
55
56    /// Get hash value regardless of variant.
57    pub fn hash(&self) -> u64 {
58        match self {
59            EditVerification::Hash(h) => *h,
60            EditVerification::ExactMatch(text) => xxh3_64(text.as_bytes()),
61        }
62    }
63}
64
65#[derive(Error, Debug)]
66pub enum EditError {
67    #[error("Before-text verification failed at {file}:{byte_start}")]
68    BeforeTextMismatch {
69        file: PathBuf,
70        byte_start: usize,
71        byte_end: usize,
72        expected: String,
73        found: String,
74    },
75
76    #[error("Invalid byte range: [{byte_start}, {byte_end}) in file of length {file_len}")]
77    InvalidByteRange {
78        byte_start: usize,
79        byte_end: usize,
80        file_len: usize,
81    },
82
83    #[error("Cannot edit file outside workspace: {0}")]
84    OutsideWorkspace(PathBuf),
85
86    #[error("File I/O error: {0}")]
87    Io(#[from] std::io::Error),
88
89    #[error("UTF-8 validation error: {0}")]
90    Utf8(#[from] std::str::Utf8Error),
91
92    #[error("Invalid edit would create malformed UTF-8")]
93    InvalidUtf8Edit,
94}
95
96impl Clone for EditError {
97    fn clone(&self) -> Self {
98        match self {
99            Self::BeforeTextMismatch {
100                file,
101                byte_start,
102                byte_end,
103                expected,
104                found,
105            } => Self::BeforeTextMismatch {
106                file: file.clone(),
107                byte_start: *byte_start,
108                byte_end: *byte_end,
109                expected: expected.clone(),
110                found: found.clone(),
111            },
112            Self::InvalidByteRange {
113                byte_start,
114                byte_end,
115                file_len,
116            } => Self::InvalidByteRange {
117                byte_start: *byte_start,
118                byte_end: *byte_end,
119                file_len: *file_len,
120            },
121            Self::OutsideWorkspace(p) => Self::OutsideWorkspace(p.clone()),
122            Self::Io(e) => Self::Io(std::io::Error::new(e.kind(), e.to_string())),
123            Self::Utf8(e) => Self::Utf8(*e),
124            Self::InvalidUtf8Edit => Self::InvalidUtf8Edit,
125        }
126    }
127}
128
129/// Result of applying an edit.
130#[derive(Debug, Clone, PartialEq, Eq)]
131#[must_use = "EditResult should be checked for success/already-applied"]
132pub enum EditResult {
133    /// Edit was successfully applied
134    Applied { file: PathBuf, bytes_changed: usize },
135    /// Edit was already applied (current text matches new_text)
136    AlreadyApplied { file: PathBuf },
137}
138
139impl Edit {
140    /// Create a new edit with automatic verification generation.
141    pub fn new(
142        file: impl Into<PathBuf>,
143        byte_start: usize,
144        byte_end: usize,
145        new_text: impl Into<String>,
146        expected_before: impl Into<String>,
147    ) -> Self {
148        let expected = expected_before.into();
149        Self {
150            file: file.into(),
151            byte_start,
152            byte_end,
153            new_text: new_text.into(),
154            expected_before: EditVerification::from_text(&expected),
155        }
156    }
157
158    /// Create an edit with explicit verification strategy.
159    pub fn with_verification(
160        file: impl Into<PathBuf>,
161        byte_start: usize,
162        byte_end: usize,
163        new_text: impl Into<String>,
164        verification: EditVerification,
165    ) -> Self {
166        Self {
167            file: file.into(),
168            byte_start,
169            byte_end,
170            new_text: new_text.into(),
171            expected_before: verification,
172        }
173    }
174
175    /// Validate the edit against the current file contents.
176    ///
177    /// Returns the current text at [byte_start, byte_end) if validation succeeds.
178    fn validate<'a>(&self, content: &'a [u8]) -> Result<&'a [u8], EditError> {
179        // Validate byte range
180        if self.byte_start > self.byte_end {
181            return Err(EditError::InvalidByteRange {
182                byte_start: self.byte_start,
183                byte_end: self.byte_end,
184                file_len: content.len(),
185            });
186        }
187
188        if self.byte_end > content.len() {
189            return Err(EditError::InvalidByteRange {
190                byte_start: self.byte_start,
191                byte_end: self.byte_end,
192                file_len: content.len(),
193            });
194        }
195
196        // Extract current text at span
197        let current_bytes = &content[self.byte_start..self.byte_end];
198        let current_text = std::str::from_utf8(current_bytes)?;
199
200        // Check if already applied (idempotency)
201        if current_text == self.new_text {
202            return Ok(current_bytes);
203        }
204
205        // Verify expected before-text
206        if !self.expected_before.matches(current_text) {
207            return Err(EditError::BeforeTextMismatch {
208                file: self.file.clone(),
209                byte_start: self.byte_start,
210                byte_end: self.byte_end,
211                expected: format!("{:?}", self.expected_before),
212                found: current_text.to_string(),
213            });
214        }
215
216        Ok(current_bytes)
217    }
218
219    /// Apply this edit to the file system atomically.
220    ///
221    /// Uses tempfile + fsync + rename for crash safety.
222    pub fn apply(&self) -> Result<EditResult, EditError> {
223        // Read current file
224        let original_content = fs::read(&self.file)?;
225
226        // Validate edit
227        let current_bytes = self.validate(&original_content)?;
228
229        // Check idempotency
230        if std::str::from_utf8(current_bytes)? == self.new_text {
231            return Ok(EditResult::AlreadyApplied {
232                file: self.file.clone(),
233            });
234        }
235
236        // Build new content
237        let mut new_content = Vec::with_capacity(
238            original_content.len() + self.new_text.len() - (self.byte_end - self.byte_start),
239        );
240        new_content.extend_from_slice(&original_content[..self.byte_start]);
241        new_content.extend_from_slice(self.new_text.as_bytes());
242        new_content.extend_from_slice(&original_content[self.byte_end..]);
243
244        // Validate resulting content is valid UTF-8
245        std::str::from_utf8(&new_content).map_err(|_| EditError::InvalidUtf8Edit)?;
246
247        // Atomic write: tempfile in same directory, fsync, rename
248        atomic_write(&self.file, &new_content)?;
249
250        // Update mtime to invalidate incremental compilation
251        let now = filetime::FileTime::now();
252        filetime::set_file_mtime(&self.file, now)?;
253
254        Ok(EditResult::Applied {
255            file: self.file.clone(),
256            bytes_changed: self.new_text.len(),
257        })
258    }
259
260    /// Apply multiple edits to the same file in a single atomic operation.
261    ///
262    /// Edits are sorted by byte_start descending and applied bottom-to-top
263    /// to avoid offset invalidation.
264    pub fn apply_batch(mut edits: Vec<Edit>) -> Result<Vec<EditResult>, EditError> {
265        if edits.is_empty() {
266            return Ok(Vec::new());
267        }
268
269        // Group by file
270        edits.sort_by(|a, b| {
271            a.file.cmp(&b.file).then(b.byte_start.cmp(&a.byte_start)) // Descending by byte_start
272        });
273
274        let mut results = Vec::with_capacity(edits.len());
275        let mut current_file = None;
276        let mut file_edits = Vec::new();
277
278        for edit in edits {
279            match &current_file {
280                None => {
281                    current_file = Some(edit.file.clone());
282                    file_edits.push(edit);
283                }
284                Some(path) if path == &edit.file => {
285                    file_edits.push(edit);
286                }
287                Some(_) => {
288                    // File changed, apply accumulated edits
289                    results.extend(apply_file_edits(&file_edits)?);
290                    file_edits.clear();
291                    current_file = Some(edit.file.clone());
292                    file_edits.push(edit);
293                }
294            }
295        }
296
297        // Apply remaining edits
298        if !file_edits.is_empty() {
299            results.extend(apply_file_edits(&file_edits)?);
300        }
301
302        Ok(results)
303    }
304}
305
306/// Apply multiple edits to a single file atomically.
307///
308/// Assumes edits are sorted by byte_start descending.
309fn apply_file_edits(edits: &[Edit]) -> Result<Vec<EditResult>, EditError> {
310    if edits.is_empty() {
311        return Ok(Vec::new());
312    }
313
314    let file = &edits[0].file;
315    let original_content = fs::read(file)?;
316
317    // Validate all edits first
318    for edit in edits {
319        edit.validate(&original_content)?;
320    }
321
322    // Verify descending sort invariant before applying
323    debug_assert!(
324        edits.windows(2).all(|w| w[0].byte_start >= w[1].byte_start),
325        "edits must be sorted descending by byte_start"
326    );
327
328    // Check for overlapping spans (edits are sorted descending by byte_start)
329    // window[0] has the higher byte_start, window[1] has the lower byte_start.
330    // For non-overlapping regions: the lower edit's end must not exceed the higher edit's start.
331    for window in edits.windows(2) {
332        let (higher, lower) = (&window[0], &window[1]);
333        if lower.byte_end > higher.byte_start {
334            return Err(EditError::InvalidByteRange {
335                byte_start: higher.byte_start,
336                byte_end: lower.byte_end,
337                file_len: original_content.len(),
338            });
339        }
340    }
341
342    // Apply edits bottom-to-top (already sorted descending)
343    let mut new_content = original_content.clone();
344    let mut results = Vec::with_capacity(edits.len());
345
346    for edit in edits {
347        let current_bytes = &new_content[edit.byte_start..edit.byte_end];
348        let current_text = std::str::from_utf8(current_bytes)?;
349
350        // Check idempotency
351        if current_text == edit.new_text {
352            results.push(EditResult::AlreadyApplied {
353                file: edit.file.clone(),
354            });
355            continue;
356        }
357
358        // Splice in new text
359        new_content.splice(
360            edit.byte_start..edit.byte_end,
361            edit.new_text.as_bytes().iter().copied(),
362        );
363
364        results.push(EditResult::Applied {
365            file: edit.file.clone(),
366            bytes_changed: edit.new_text.len(),
367        });
368    }
369
370    // Validate resulting content is valid UTF-8
371    std::str::from_utf8(&new_content).map_err(|_| EditError::InvalidUtf8Edit)?;
372
373    // Atomic write
374    atomic_write(file, &new_content)?;
375
376    // Update mtime
377    let now = filetime::FileTime::now();
378    filetime::set_file_mtime(file, now)?;
379
380    Ok(results)
381}
382
383/// Atomic file write: tempfile + fsync + rename.
384///
385/// This ensures crash safety - either the full write succeeds or nothing changes.
386fn atomic_write(path: &Path, content: &[u8]) -> Result<(), EditError> {
387    // Create tempfile in same directory to ensure same filesystem
388    let parent = path.parent().ok_or_else(|| {
389        EditError::Io(std::io::Error::new(
390            std::io::ErrorKind::InvalidInput,
391            "Path has no parent directory",
392        ))
393    })?;
394
395    let mut temp = tempfile::NamedTempFile::new_in(parent)?;
396
397    // Write content
398    temp.write_all(content)?;
399
400    // Flush to disk (fsync)
401    temp.as_file().sync_all()?;
402
403    // Atomic rename
404    temp.persist(path).map_err(|e| e.error)?;
405
406    Ok(())
407}
408
409#[cfg(test)]
410mod tests {
411    use super::*;
412
413    #[test]
414    fn test_edit_verification_exact_match() {
415        let text = "hello world";
416        let verify = EditVerification::ExactMatch(text.to_string());
417        assert!(verify.matches(text));
418        assert!(!verify.matches("hello"));
419    }
420
421    #[test]
422    fn test_edit_verification_hash() {
423        let text = "hello world";
424        let hash = xxh3_64(text.as_bytes());
425        let verify = EditVerification::Hash(hash);
426        assert!(verify.matches(text));
427        assert!(!verify.matches("goodbye world"));
428    }
429
430    #[test]
431    fn test_edit_verification_from_text_small() {
432        let text = "small";
433        let verify = EditVerification::from_text(text);
434        assert!(matches!(verify, EditVerification::ExactMatch(_)));
435    }
436
437    #[test]
438    fn test_edit_verification_from_text_large() {
439        let text = "x".repeat(2000);
440        let verify = EditVerification::from_text(&text);
441        assert!(matches!(verify, EditVerification::Hash(_)));
442    }
443
444    #[test]
445    fn test_edit_validation_invalid_range() {
446        let content = b"hello world";
447        let edit = Edit::new("test.txt", 5, 20, "replacement", "");
448        let result = edit.validate(content);
449        assert!(matches!(result, Err(EditError::InvalidByteRange { .. })));
450    }
451
452    #[test]
453    fn test_edit_validation_inverted_range() {
454        let content = b"hello world";
455        let edit = Edit::new("test.txt", 10, 5, "replacement", "");
456        let result = edit.validate(content);
457        assert!(matches!(result, Err(EditError::InvalidByteRange { .. })));
458    }
459
460    #[test]
461    fn test_edit_idempotency_check() {
462        let content = b"hello world";
463        let edit = Edit::new("test.txt", 0, 5, "hello", "hello");
464        let result = edit.validate(content);
465        assert!(result.is_ok());
466    }
467
468    #[test]
469    fn test_atomic_write_integration() {
470        let temp_dir = tempfile::tempdir().unwrap();
471        let file_path = temp_dir.path().join("test.txt");
472        fs::write(&file_path, b"original content").unwrap();
473
474        let edit = Edit::new(&file_path, 0, 8, "modified", "original");
475        let result = edit.apply().unwrap();
476
477        assert!(matches!(result, EditResult::Applied { .. }));
478        let new_content = fs::read_to_string(&file_path).unwrap();
479        assert_eq!(new_content, "modified content");
480    }
481
482    #[test]
483    fn test_edit_idempotency_application() {
484        let temp_dir = tempfile::tempdir().unwrap();
485        let file_path = temp_dir.path().join("test.txt");
486        fs::write(&file_path, b"hello world").unwrap();
487
488        let edit = Edit::new(&file_path, 0, 5, "hello", "hello");
489        let result = edit.apply().unwrap();
490
491        assert!(matches!(result, EditResult::AlreadyApplied { .. }));
492        let content = fs::read_to_string(&file_path).unwrap();
493        assert_eq!(content, "hello world");
494    }
495
496    #[test]
497    fn test_batch_edits_same_file() {
498        let temp_dir = tempfile::tempdir().unwrap();
499        let file_path = temp_dir.path().join("test.txt");
500        fs::write(&file_path, b"line1\nline2\nline3\n").unwrap();
501
502        let edits = vec![
503            Edit::new(&file_path, 0, 5, "LINE1", "line1"),
504            Edit::new(&file_path, 6, 11, "LINE2", "line2"),
505            Edit::new(&file_path, 12, 17, "LINE3", "line3"),
506        ];
507
508        let results = Edit::apply_batch(edits).unwrap();
509        assert_eq!(results.len(), 3);
510
511        let content = fs::read_to_string(&file_path).unwrap();
512        assert_eq!(content, "LINE1\nLINE2\nLINE3\n");
513    }
514}