Skip to main content

aft/
edit.rs

1//! Shared edit engine: byte-offset conversion, content replacement,
2//! syntax validation, and auto-backup orchestration.
3//!
4//! Used by `write`, `edit_symbol`, `edit_match`, and `batch` commands.
5
6#![cfg_attr(test, allow(clippy::items_after_test_module))]
7
8use std::path::Path;
9
10use crate::config::Config;
11use crate::context::AppContext;
12use crate::error::AftError;
13use crate::format;
14use crate::parser::{detect_language, grammar_for, FileParser};
15
16/// Convert 0-indexed line/col to a byte offset within `source`.
17///
18/// Tree-sitter columns are byte-indexed within the line, so `col` is a byte
19/// offset from the start of the line (not a character offset).
20///
21/// Scans raw bytes so both LF and CRLF line endings are counted correctly.
22/// Returns `source.len()` if line is beyond the end of the file.
23pub fn line_col_to_byte(source: &str, line: u32, col: u32) -> usize {
24    let bytes = source.as_bytes();
25    let target_line = line as usize;
26    let mut current_line = 0usize;
27    let mut line_start = 0usize;
28
29    loop {
30        let mut line_end = line_start;
31        while line_end < bytes.len() && bytes[line_end] != b'\n' && bytes[line_end] != b'\r' {
32            line_end += 1;
33        }
34
35        if current_line == target_line {
36            return line_start + (col as usize).min(line_end.saturating_sub(line_start));
37        }
38
39        if line_end >= bytes.len() {
40            return source.len();
41        }
42
43        line_start = if bytes[line_end] == b'\r'
44            && line_end + 1 < bytes.len()
45            && bytes[line_end + 1] == b'\n'
46        {
47            line_end + 2
48        } else {
49            line_end + 1
50        };
51        current_line += 1;
52    }
53}
54
55/// Replace bytes in `[start..end)` with `replacement`.
56///
57/// Returns an error if the range is invalid or does not align to UTF-8 char boundaries.
58pub fn replace_byte_range(
59    source: &str,
60    start: usize,
61    end: usize,
62    replacement: &str,
63) -> Result<String, AftError> {
64    if start > end {
65        return Err(AftError::InvalidRequest {
66            message: format!(
67                "invalid byte range [{}..{}): start must be <= end",
68                start, end
69            ),
70        });
71    }
72    if end > source.len() {
73        return Err(AftError::InvalidRequest {
74            message: format!(
75                "invalid byte range [{}..{}): end exceeds source length {}",
76                start,
77                end,
78                source.len()
79            ),
80        });
81    }
82    if !source.is_char_boundary(start) {
83        return Err(AftError::InvalidRequest {
84            message: format!(
85                "invalid byte range [{}..{}): start is not a char boundary",
86                start, end
87            ),
88        });
89    }
90    if !source.is_char_boundary(end) {
91        return Err(AftError::InvalidRequest {
92            message: format!(
93                "invalid byte range [{}..{}): end is not a char boundary",
94                start, end
95            ),
96        });
97    }
98
99    let mut result = String::with_capacity(
100        source.len().saturating_sub(end.saturating_sub(start)) + replacement.len(),
101    );
102    result.push_str(&source[..start]);
103    result.push_str(replacement);
104    result.push_str(&source[end..]);
105    Ok(result)
106}
107
108/// Validate syntax of a file using a fresh FileParser (D023).
109///
110/// Returns `Ok(Some(true))` if syntax is valid, `Ok(Some(false))` if there are
111/// parse errors, and `Ok(None)` if the language is unsupported.
112pub fn validate_syntax(path: &Path) -> Result<Option<bool>, AftError> {
113    let mut parser = FileParser::new();
114    match parser.parse(path) {
115        Ok((tree, _lang)) => Ok(Some(!tree.root_node().has_error())),
116        Err(AftError::InvalidRequest { .. }) => {
117            // Unsupported language — not an error, just can't validate
118            Ok(None)
119        }
120        Err(e) => Err(e),
121    }
122}
123
124/// Validate syntax of an in-memory string without touching disk.
125///
126/// Uses `detect_language(path)` + `grammar_for(lang)` + `parser.parse()`
127/// to validate syntax of a proposed content string. Returns `None` for
128/// unsupported languages, `Some(true)` for valid, `Some(false)` for invalid.
129pub fn validate_syntax_str(content: &str, path: &Path) -> Option<bool> {
130    let lang = detect_language(path)?;
131    let grammar = grammar_for(lang);
132    let mut parser = tree_sitter::Parser::new();
133    if parser.set_language(&grammar).is_err() {
134        return None;
135    }
136    let tree = parser.parse(content.as_bytes(), None)?;
137    Some(!tree.root_node().has_error())
138}
139
140/// Check if the caller requested diff info in the response.
141///
142/// `include_diff` yields a compact counts-only diff (`additions`/`deletions`),
143/// which is what agent-facing/raw consumers should use — the payload does not
144/// scale with file size. Full before/after content requires the separate
145/// `include_diff_content` flag (UI metadata only); see [`wants_diff_content`].
146pub fn wants_diff(params: &serde_json::Value) -> bool {
147    params
148        .get("include_diff")
149        .and_then(|v| v.as_bool())
150        .unwrap_or(false)
151        || wants_diff_content(params)
152}
153
154/// Check if the caller requested the full before/after file contents in the
155/// diff. This is for UI rendering only (e.g. the OpenCode/Pi plugins building a
156/// diff view in tool metadata) and is deliberately NOT the default: full
157/// content makes the response scale with file size, not edit size, which floods
158/// agent context on large files. Agent-facing/raw consumers should pass
159/// `include_diff` (counts only) instead.
160pub fn wants_diff_content(params: &serde_json::Value) -> bool {
161    params
162        .get("include_diff_content")
163        .and_then(|v| v.as_bool())
164        .unwrap_or(false)
165}
166
167/// Compute compact diff counts (additions/deletions) without echoing any file
168/// content. This is the agent-facing default — the payload is constant-size
169/// regardless of how large the edited file is.
170pub fn compute_diff_counts(before: &str, after: &str) -> serde_json::Value {
171    use similar::ChangeTag;
172
173    let diff = similar::TextDiff::from_lines(before, after);
174    let mut additions = 0usize;
175    let mut deletions = 0usize;
176    for change in diff.iter_all_changes() {
177        match change.tag() {
178            ChangeTag::Insert => additions += 1,
179            ChangeTag::Delete => deletions += 1,
180            ChangeTag::Equal => {}
181        }
182    }
183    serde_json::json!({
184        "additions": additions,
185        "deletions": deletions,
186    })
187}
188
189/// Pick the right diff shape for a response based on request flags.
190///
191/// Default (`include_diff`): compact counts only — constant-size payload that
192/// never floods agent context. Full before/after content is returned only when
193/// the caller explicitly opts in with `include_diff_content` (UI metadata path).
194pub fn compute_diff_for_response(
195    params: &serde_json::Value,
196    before: &str,
197    after: &str,
198) -> serde_json::Value {
199    if wants_diff_content(params) {
200        compute_diff_info(before, after)
201    } else {
202        compute_diff_counts(before, after)
203    }
204}
205
206/// Compute diff info between before/after content for UI metadata.
207/// Returns a JSON value with before, after, additions, deletions.
208/// For files >512KB, omits full content and returns only counts.
209pub fn compute_diff_info(before: &str, after: &str) -> serde_json::Value {
210    use similar::ChangeTag;
211
212    let diff = similar::TextDiff::from_lines(before, after);
213    let mut additions = 0usize;
214    let mut deletions = 0usize;
215    for change in diff.iter_all_changes() {
216        match change.tag() {
217            ChangeTag::Insert => additions += 1,
218            ChangeTag::Delete => deletions += 1,
219            ChangeTag::Equal => {}
220        }
221    }
222
223    // For large files, skip sending full content to avoid bloating JSON
224    let size_limit = 512 * 1024; // 512KB
225    if before.len() > size_limit || after.len() > size_limit {
226        serde_json::json!({
227            "additions": additions,
228            "deletions": deletions,
229            "truncated": true,
230        })
231    } else {
232        serde_json::json!({
233            "before": before,
234            "after": after,
235            "additions": additions,
236            "deletions": deletions,
237        })
238    }
239}
240/// Snapshot the file into the backup store before mutation, scoped to a session.
241///
242/// Returns `Ok(Some(backup_id))` if the file existed and was backed up,
243/// `Ok(None)` if the file doesn't exist (new file creation).
244///
245/// The `session` argument is the request-level session namespace (see
246/// [`crate::protocol::RawRequest::session`]). Snapshots created by one session
247/// are not visible from another, which is what keeps undo state isolated in
248/// a shared-bridge setup (issue #14).
249///
250/// Drops the RefCell borrow before returning (D029).
251pub fn auto_backup(
252    ctx: &AppContext,
253    session: &str,
254    path: &Path,
255    description: &str,
256    op_id: Option<&str>,
257) -> Result<Option<String>, AftError> {
258    if std::fs::symlink_metadata(path).is_err() {
259        return Ok(None);
260    }
261    let backup_id = {
262        let mut store = ctx.backup().borrow_mut();
263        store.snapshot_with_op(session, path, description, op_id)?
264    }; // borrow dropped here
265    Ok(Some(backup_id))
266}
267
268/// Result of the write → format → validate pipeline.
269///
270/// Returned by `write_format_validate` to give callers a single struct
271/// with all post-write signals for the response JSON.
272pub struct WriteResult {
273    /// Whether tree-sitter syntax validation passed. `None` if unsupported language.
274    pub syntax_valid: Option<bool>,
275    /// Whether the file was auto-formatted.
276    pub formatted: bool,
277    /// Why formatting was skipped, if it was. Values: "unsupported_language",
278    /// "no_formatter_configured", "formatter_not_installed", "formatter_excluded_path",
279    /// "timeout", "error".
280    pub format_skipped_reason: Option<String>,
281    /// Whether full validation was requested (controls whether validation_errors is included in response).
282    pub validate_requested: bool,
283    /// Structured type-checker errors (only populated when validate:"full" is requested).
284    pub validation_errors: Vec<format::ValidationError>,
285    /// Why validation was skipped, if it was. Values: "unsupported_language",
286    /// "no_checker_configured", "checker_not_installed", "timeout", "error".
287    pub validate_skipped_reason: Option<String>,
288    /// True when the write+format+validate pipeline detected post-write
289    /// invalid syntax against a previously-valid file and restored the
290    /// pre-write content. The on-disk file is the original; `syntax_valid`
291    /// reports the would-have-been-written status (Some(false)).
292    pub rolled_back: bool,
293    /// Per-edit LSP diagnostics outcome (v0.17.3). Carries the verified-fresh
294    /// diagnostics PLUS per-server status (pending/exited) so the response
295    /// can report `complete: bool` honestly.
296    ///
297    /// `None` means the caller didn't request diagnostics OR the request
298    /// was a fire-and-forget notify (no wait). `Some(outcome)` always
299    /// reports diagnostics from servers that proved freshness against the
300    /// post-edit document version.
301    pub lsp_outcome: Option<crate::lsp::manager::PostEditWaitOutcome>,
302}
303
304/// Render structured validation errors as a compact `line N: message` list for
305/// an error message. Used by the refactor handlers when a write was rolled back.
306pub fn format_validation_errors(errors: &[format::ValidationError]) -> String {
307    errors
308        .iter()
309        .map(|e| format!("line {}: {}", e.line, e.message))
310        .collect::<Vec<_>>()
311        .join("; ")
312}
313
314impl WriteResult {
315    /// Append LSP diagnostics + per-server status to a response JSON
316    /// object.
317    ///
318    /// v0.17.3 honest-reporting contract: when diagnostics were requested
319    /// (`lsp_outcome.is_some()`), this ALWAYS emits `lsp_diagnostics: [...]`
320    /// (even if empty) plus `lsp_complete: bool`, `lsp_pending_servers`,
321    /// and `lsp_exited_servers`. Empty `lsp_diagnostics` no longer means
322    /// "the field disappeared" — it means "we waited and got an explicit
323    /// fresh-but-clean result, OR every expected server is in the pending/
324    /// exited list (check `lsp_complete`)."
325    ///
326    /// When diagnostics were NOT requested (`lsp_outcome.is_none()`),
327    /// nothing is added — keeps the no-LSP edit path's response shape
328    /// unchanged.
329    pub fn append_lsp_diagnostics_to(&self, result: &mut serde_json::Value) {
330        result["rolled_back"] = serde_json::json!(self.rolled_back);
331
332        let Some(outcome) = self.lsp_outcome.as_ref() else {
333            return;
334        };
335
336        result["lsp_diagnostics"] = serde_json::json!(outcome
337            .diagnostics
338            .iter()
339            .map(|d| {
340                serde_json::json!({
341                    "file": d.file.display().to_string(),
342                    "line": d.line,
343                    "column": d.column,
344                    "end_line": d.end_line,
345                    "end_column": d.end_column,
346                    "severity": d.severity.as_str(),
347                    "message": d.message,
348                    "code": d.code,
349                    "source": d.source,
350                })
351            })
352            .collect::<Vec<_>>());
353
354        result["lsp_complete"] = serde_json::Value::Bool(outcome.complete());
355
356        if !outcome.pending_servers.is_empty() {
357            result["lsp_pending_servers"] = serde_json::json!(outcome
358                .pending_servers
359                .iter()
360                .map(|key| key.kind.id_str().to_string())
361                .collect::<Vec<_>>());
362        }
363        if !outcome.exited_servers.is_empty() {
364            result["lsp_exited_servers"] = serde_json::json!(outcome
365                .exited_servers
366                .iter()
367                .map(|key| key.kind.id_str().to_string())
368                .collect::<Vec<_>>());
369        }
370    }
371}
372
373/// Write content to disk, auto-format, then validate syntax.
374///
375/// This is the shared tail for all mutation commands. The pipeline order is:
376/// 1. `fs::write` — persist content
377/// 2. `auto_format` — run the project formatter (reads the written file, writes back)
378/// 3. `validate_syntax` — parse the (potentially formatted) file
379/// 4. `validate_full` — run type checker if requested by params or config
380///
381/// The `params` argument carries the original request parameters. When it
382/// contains `"validate": "full"`, or config sets `validate_on_edit: "full"`,
383/// the project's type checker is invoked after syntax validation and the
384/// results are included in `WriteResult`.
385pub fn write_format_validate(
386    path: &Path,
387    content: &str,
388    config: &Config,
389    params: &serde_json::Value,
390) -> Result<WriteResult, AftError> {
391    let pre_write_content = if path.exists() {
392        std::fs::read_to_string(path).ok()
393    } else {
394        None
395    };
396    // Existing clean files are protected from invalid mutations. New files have
397    // no safe prior content to restore, so their pre-write validity remains None
398    // and invalid syntax is reported without rollback.
399    let was_syntax_valid = if pre_write_content.is_some() {
400        match validate_syntax(path) {
401            Ok(valid) => valid,
402            Err(_) => None,
403        }
404    } else {
405        None
406    };
407
408    // Step 1: Write
409    std::fs::write(path, content).map_err(|e| AftError::InvalidRequest {
410        message: format!("failed to write file: {}", e),
411    })?;
412
413    // Step 2: Format (before validate so we validate the formatted content)
414    let (formatted, format_skipped_reason) = format::auto_format(path, config);
415
416    // Step 3: Validate syntax
417    let syntax_valid = match validate_syntax(path) {
418        Ok(sv) => sv,
419        Err(_) => None,
420    };
421    let rolled_back = if was_syntax_valid == Some(true) && syntax_valid == Some(false) {
422        if let Some(original) = pre_write_content.as_ref() {
423            std::fs::write(path, original).map_err(|e| AftError::InvalidRequest {
424                message: format!("failed to roll back invalid edit: {}", e),
425            })?;
426            true
427        } else {
428            false
429        }
430    } else {
431        false
432    };
433
434    // Step 4: Full validation (type checker) — only when requested
435    let param_validate = params.get("validate").and_then(|v| v.as_str());
436    let config_validate = config.validate_on_edit.as_deref();
437    // Explicit param overrides config. Valid values: "syntax" | "full" | "off".
438    let validate_mode = param_validate.or(config_validate).unwrap_or("off");
439    let validate_requested = validate_mode == "full";
440    let (validation_errors, validate_skipped_reason) = if validate_requested {
441        format::validate_full(path, config)
442    } else {
443        (Vec::new(), None)
444    };
445
446    Ok(WriteResult {
447        syntax_valid,
448        formatted,
449        format_skipped_reason,
450        validate_requested,
451        validation_errors,
452        validate_skipped_reason,
453        rolled_back,
454        lsp_outcome: None,
455    })
456}
457
458#[cfg(test)]
459mod tests {
460    use super::*;
461
462    // --- line_col_to_byte ---
463
464    #[test]
465    fn line_col_to_byte_empty_string() {
466        assert_eq!(line_col_to_byte("", 0, 0), 0);
467    }
468
469    #[test]
470    fn line_col_to_byte_single_line() {
471        let source = "hello";
472        assert_eq!(line_col_to_byte(source, 0, 0), 0);
473        assert_eq!(line_col_to_byte(source, 0, 3), 3);
474        assert_eq!(line_col_to_byte(source, 0, 5), 5); // end of line
475    }
476
477    #[test]
478    fn line_col_to_byte_multi_line() {
479        let source = "abc\ndef\nghi\n";
480        // line 0: "abc" at bytes 0..3, newline at 3
481        assert_eq!(line_col_to_byte(source, 0, 0), 0);
482        assert_eq!(line_col_to_byte(source, 0, 2), 2);
483        // line 1: "def" at bytes 4..7, newline at 7
484        assert_eq!(line_col_to_byte(source, 1, 0), 4);
485        assert_eq!(line_col_to_byte(source, 1, 3), 7);
486        // line 2: "ghi" at bytes 8..11, newline at 11
487        assert_eq!(line_col_to_byte(source, 2, 0), 8);
488        assert_eq!(line_col_to_byte(source, 2, 2), 10);
489    }
490
491    #[test]
492    fn line_col_to_byte_last_line_no_trailing_newline() {
493        let source = "abc\ndef";
494        // line 1: "def" at bytes 4..7, no trailing newline
495        assert_eq!(line_col_to_byte(source, 1, 0), 4);
496        assert_eq!(line_col_to_byte(source, 1, 3), 7); // end
497    }
498
499    #[test]
500    fn line_col_to_byte_multi_byte_utf8() {
501        // "é" is 2 bytes in UTF-8
502        let source = "café\nbar";
503        // line 0: "café" is 5 bytes (c=1, a=1, f=1, é=2)
504        assert_eq!(line_col_to_byte(source, 0, 0), 0);
505        assert_eq!(line_col_to_byte(source, 0, 5), 5); // end of "café"
506                                                       // line 1: "bar" starts at byte 6
507        assert_eq!(line_col_to_byte(source, 1, 0), 6);
508        assert_eq!(line_col_to_byte(source, 1, 2), 8);
509    }
510
511    #[test]
512    fn line_col_to_byte_beyond_end() {
513        let source = "abc";
514        // Line beyond file returns source.len()
515        assert_eq!(line_col_to_byte(source, 5, 0), source.len());
516    }
517
518    #[test]
519    fn line_col_to_byte_col_clamped_to_line_length() {
520        let source = "ab\ncd";
521        // col=10 on a 2-char line should clamp to 2
522        assert_eq!(line_col_to_byte(source, 0, 10), 2);
523    }
524
525    #[test]
526    fn line_col_to_byte_crlf() {
527        let source = "abc\r\ndef\r\nghi\r\n";
528        assert_eq!(line_col_to_byte(source, 0, 0), 0);
529        assert_eq!(line_col_to_byte(source, 0, 10), 3);
530        assert_eq!(line_col_to_byte(source, 1, 0), 5);
531        assert_eq!(line_col_to_byte(source, 1, 3), 8);
532        assert_eq!(line_col_to_byte(source, 2, 0), 10);
533    }
534
535    // --- replace_byte_range ---
536
537    #[test]
538    fn replace_byte_range_basic() {
539        let source = "hello world";
540        let result = replace_byte_range(source, 6, 11, "rust").unwrap();
541        assert_eq!(result, "hello rust");
542    }
543
544    #[test]
545    fn replace_byte_range_delete() {
546        let source = "hello world";
547        let result = replace_byte_range(source, 5, 11, "").unwrap();
548        assert_eq!(result, "hello");
549    }
550
551    #[test]
552    fn replace_byte_range_insert_at_same_position() {
553        let source = "helloworld";
554        let result = replace_byte_range(source, 5, 5, " ").unwrap();
555        assert_eq!(result, "hello world");
556    }
557
558    #[test]
559    fn replace_byte_range_replace_entire_string() {
560        let source = "old content";
561        let result = replace_byte_range(source, 0, source.len(), "new content").unwrap();
562        assert_eq!(result, "new content");
563    }
564}