dirge-agent 0.13.9

Minimalistic coding agent written in Rust, optimized for memory footprint and performance
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
#[cfg(feature = "lsp")]
use std::sync::Arc;

use rig::completion::ToolDefinition;
use rig::tool::Tool;

use crate::agent::agent_loop::tool_input_repair::with_contract_hint;
use crate::agent::tools::cache::ToolCache;
use crate::agent::tools::{AskSender, EditArgs, PermCheck, ToolError, require_and_resolve};
#[cfg(feature = "lsp")]
use crate::lsp::manager::LspManager;

pub struct EditTool {
    pub permission: Option<PermCheck>,
    pub ask_tx: Option<AskSender>,
    cache: Option<ToolCache>,
    /// When set, the tool touches the edited file on the LSP server and
    /// appends any diagnostic block to its output. `None` reproduces the
    /// pre-LSP behaviour.
    #[cfg(feature = "lsp")]
    lsp_manager: Option<Arc<LspManager>>,
}

impl EditTool {
    #[allow(dead_code)]
    pub fn new(permission: Option<PermCheck>, ask_tx: Option<AskSender>) -> Self {
        EditTool {
            permission,
            ask_tx,
            cache: None,
            #[cfg(feature = "lsp")]
            lsp_manager: None,
        }
    }

    pub fn with_cache(
        permission: Option<PermCheck>,
        ask_tx: Option<AskSender>,
        cache: ToolCache,
        #[cfg(feature = "lsp")] lsp_manager: Option<Arc<LspManager>>,
    ) -> Self {
        EditTool {
            permission,
            ask_tx,
            cache: Some(cache),
            #[cfg(feature = "lsp")]
            lsp_manager,
        }
    }

    pub(crate) fn show_diff(
        path: &str,
        content: &str,
        byte_pos: usize,
        old_text: &str,
        new_text: &str,
    ) -> String {
        let lines: Vec<&str> = content.lines().collect();
        let old_line_count = old_text.lines().count();
        let new_line_count = new_text.lines().count();
        let ctx: usize = 3;

        let match_line = content[..byte_pos].matches('\n').count();
        let start = match_line.saturating_sub(ctx);
        let ctx_after_start = (match_line + old_line_count).min(lines.len());
        let ctx_after_end = (ctx_after_start + ctx).min(lines.len());

        let ctx_before = match_line - start;
        let ctx_after = ctx_after_end - ctx_after_start;

        let mut result = format!("\n--- a/{}\n+++ b/{}\n", path, path);
        result.push_str(&format!(
            "@@ -{old_start},{old_count} +{new_start},{new_count} @@\n",
            old_start = start + 1,
            old_count = ctx_before + old_line_count + ctx_after,
            new_start = start + 1,
            new_count = ctx_before + new_line_count + ctx_after,
        ));

        for i in start..match_line {
            if let Some(line) = lines.get(i) {
                result.push_str(&format!(" {}\n", line));
            }
        }
        for line in old_text.lines() {
            result.push_str(&format!("-{}\n", line));
        }
        for line in new_text.lines() {
            result.push_str(&format!("+{}\n", line));
        }
        for i in ctx_after_start..ctx_after_end {
            if let Some(line) = lines.get(i) {
                result.push_str(&format!(" {}\n", line));
            }
        }

        result
    }
}

impl Tool for EditTool {
    const NAME: &'static str = "edit";

    type Error = ToolError;
    type Args = EditArgs;
    type Output = String;

    async fn definition(&self, _prompt: String) -> ToolDefinition {
        ToolDefinition {
            name: "edit".to_string(),
            description: with_contract_hint(
                "edit",
                "Edit a file by replacing exact text. If old_text appears once, replaces it. If it appears multiple times and replace_all is false, returns all match locations with line numbers. Use replaceAll: true to replace every occurrence. Handles both LF and CRLF line endings.",
            ),
            parameters: serde_json::json!({
                "type": "object",
                "properties": {
                    "path": { "type": "string", "description": "The absolute path to the file to edit (must be absolute, not relative)" },
                    "old_text": { "type": "string", "description": "Exact text to find and replace" },
                    "new_text": { "type": "string", "description": "New text to replace with" },
                    "replace_all": { "type": "boolean", "description": "Replace all occurrences instead of just the first" }
                },
                "required": ["path", "old_text", "new_text"]
            }),
        }
    }

    async fn call(&self, args: EditArgs) -> Result<String, ToolError> {
        if args.old_text.is_empty() {
            return Err(ToolError::Msg(
                "old_text must not be empty. Provide the exact text to replace.".to_string(),
            ));
        }

        // Reject non-absolute paths immediately with a clear error
        // (shared guard; the schema requires an absolute path).
        // Audit H12: require absolute + pin file operations to the canonical
        // path the permission check resolved.
        let resolved_path = require_and_resolve(
            &self.permission,
            &self.ask_tx,
            "edit",
            &args.path,
            "the edit path",
        )
        .await?;

        // Read-before-edit gate (ported from vix session_read_gate.go): refuse
        // to edit a file the model hasn't read this session, so `old_text` is
        // matched against content the model actually saw — not hallucinated or
        // stale context. Only enforced when a session cache is present (the
        // read-tracker); skipped for cache-less constructions (tests).
        if let Some(ref cache) = self.cache
            && !cache.has_been_read(std::path::Path::new(&resolved_path))
        {
            return Err(ToolError::Msg(format!(
                "edit was blocked because \"{}\" has not been read in this session yet. \
                 Call read on this path first so your change is based on the current \
                 on-disk contents.",
                args.path
            )));
        }

        // Pre-check size before reading. The edit tool isn't meant
        // for huge generated artifacts; cap at 100 MiB so an LLM
        // pointing it at a gigabyte log file fails fast rather
        // than OOM-ing the process. Matches the apply_patch cap.
        const MAX_EDIT_BYTES: u64 = 100 * 1024 * 1024;
        if let Ok(meta) = tokio::fs::metadata(&resolved_path).await
            && meta.len() > MAX_EDIT_BYTES
        {
            return Err(ToolError::Msg(format!(
                "file too large for edit: {} bytes (cap {} bytes); use bash with sed/awk for huge files",
                meta.len(),
                MAX_EDIT_BYTES,
            )));
        }
        let bytes = tokio::fs::read(&resolved_path).await?;
        let has_crlf = bytes.windows(2).any(|w| w == b"\r\n");
        let content = String::from_utf8_lossy(&bytes).replace("\r\n", "\n");
        let normalized_old = args.old_text.replace("\r\n", "\n");

        // B3-9 (audit fix): replacer cascade. Previously dirge did
        // a single exact-substring match and bailed with
        // "old_text not found" on any whitespace, indent, or
        // trailing-space drift. opencode's edit.ts:222-432 has a
        // 5-step cascade; pi's edit-diff.ts:91-132 has
        // fuzzyFindText. We port the three highest-value steps
        // (LineTrimmed, WhitespaceNormalized, IndentationFlexible)
        // which together catch the ~95% of LLM whitespace drift
        // failures. Each fallback is logged so the user sees
        // we matched with tolerance, not exactness.
        let (match_positions, fallback_used): (Vec<(usize, usize)>, Option<&'static str>) = {
            // Step 1: simple exact match (current behaviour).
            let exact: Vec<(usize, usize)> = content
                .match_indices(&normalized_old)
                .map(|(i, _)| (i, i + normalized_old.len()))
                .collect();
            if !exact.is_empty() {
                (exact, None)
            } else if let Some(matches) = find_line_trimmed_matches(&content, &normalized_old)
                && !matches.is_empty()
            {
                (matches, Some("line-trimmed"))
            } else if let Some(matches) =
                find_whitespace_normalized_matches(&content, &normalized_old)
                && !matches.is_empty()
            {
                (matches, Some("whitespace-normalized"))
            } else if let Some(matches) =
                find_indentation_flexible_matches(&content, &normalized_old)
                && !matches.is_empty()
            {
                (matches, Some("indentation-flexible"))
            } else {
                (Vec::new(), None)
            }
        };

        if match_positions.is_empty() {
            return Err(ToolError::Msg(format!(
                "old_text not found in '{}'.\nEnsure the exact text matches including whitespace and line endings. \
                Tried exact match, line-trimmed match, whitespace-normalized match, and indentation-flexible match.",
                args.path
            )));
        }

        // dirge-nj6d: the fuzzy fallback matchers can return OVERLAPPING
        // byte ranges (e.g. the whitespace-normalized matcher tries block
        // sizes up to +5, so two different start lines can cover the same
        // region). Splicing overlapping ranges — even in reverse — corrupts
        // the buffer or panics at a non-char boundary inside
        // `replace_range`. Keep only a disjoint set so every downstream
        // consumer (ambiguity count + replace_all splice) is safe.
        let match_ranges: Vec<(usize, usize)> = keep_disjoint_ranges(match_positions);
        // Reduce to start positions for backwards compat with the
        // downstream ambiguity-reporting and replacement logic.
        let match_positions: Vec<usize> = match_ranges.iter().map(|(s, _)| *s).collect();

        let do_replace_all = args.replace_all.unwrap_or(false);

        if match_positions.len() > 1 && !do_replace_all {
            let line_starts: Vec<usize> = std::iter::once(0)
                .chain(content.match_indices('\n').map(|(i, _)| i + 1))
                .collect();

            // Cap the per-match preview list so a pattern matching
            // thousands of lines doesn't return a thousand-line error
            // blob to the LLM — which would blow the agent's context
            // and crowd out the actual narrative. Show the first
            // MAX_AMBIGUOUS_MATCHES, then a single "...and N more"
            // line. 20 is enough to disambiguate any realistic case
            // (functions named identically, repeated string lits) while
            // keeping the error under a few KB.
            const MAX_AMBIGUOUS_MATCHES: usize = 20;
            let total_matches = match_positions.len();
            let preview_positions: &[usize] =
                &match_positions[..total_matches.min(MAX_AMBIGUOUS_MATCHES)];

            let mut match_info = Vec::with_capacity(preview_positions.len() + 1);
            for &byte_idx in preview_positions {
                let line_num = match line_starts.binary_search(&byte_idx) {
                    Ok(i) => i + 1,
                    Err(i) => i,
                };
                let line_start = line_starts.get(line_num - 1).copied().unwrap_or(0);
                let line_end = content[line_start..]
                    .find('\n')
                    .map(|e| line_start + e)
                    .unwrap_or(content.len());
                let line_text = &content[line_start..line_end];
                let truncated: String = line_text.chars().take(100).collect();
                match_info.push(format!("  Line {}: {}", line_num, truncated));
            }
            if total_matches > MAX_AMBIGUOUS_MATCHES {
                let remaining = total_matches - MAX_AMBIGUOUS_MATCHES;
                match_info.push(format!(
                    "  ... and {} more match{}",
                    remaining,
                    if remaining == 1 { "" } else { "es" },
                ));
            }

            return Err(ToolError::Msg(format!(
                "old_text matched {} times in {}:\n{}\n\nUse replace_all: true to replace all occurrences, or provide more surrounding context in old_text to narrow the match.",
                total_matches,
                args.path,
                match_info.join("\n"),
            )));
        }

        let byte_pos = match_positions[0];
        // B3-9: when the cascade fired, the matched substring may
        // differ from normalized_old (different whitespace/indent).
        // Replace by exact byte range instead of string.replace
        // (which would re-search normalized_old and not find it).
        let new_content = if do_replace_all {
            // For replace_all we splice every range in reverse
            // order so earlier offsets stay valid.
            let mut out = content.clone();
            let mut ranges = match_ranges.clone();
            ranges.sort_by_key(|r| std::cmp::Reverse(r.0));
            for (start, end) in ranges {
                out.replace_range(start..end, &args.new_text);
            }
            out
        } else {
            let (start, end) = match_ranges[0];
            let mut out = content.clone();
            out.replace_range(start..end, &args.new_text);
            out
        };

        // B3-9: surface the fallback used so the LLM sees we
        // didn't match exactly — helps it correct future calls.
        let fallback_note = match fallback_used {
            Some(label) => format!(
                " (matched via {} fallback — exact text not found; whitespace/indent tolerated)",
                label
            ),
            None => String::new(),
        };

        let candidate = if has_crlf {
            new_content.replace('\n', "\r\n")
        } else {
            new_content
        };

        // Phase-2 tree-sitter validation: refuse to write
        // syntactically-broken edits so the model sees the error
        // in the same turn. dirge-p5fu: a purely unclosed-delimiter
        // imbalance is mechanically closed (parity with the JSON
        // truncation repair) and reported, rather than bounced back.
        // See docs/AGENTIC_LOOP_PLAN.md §2.
        let (output, syntax_note) =
            crate::agent::tools::syntax_gate(std::path::Path::new(&resolved_path), &candidate)
                .map_err(ToolError::Msg)?;
        // Captured before `append_repair_note` consumes `syntax_note` below;
        // gates the repair-path LSP rollback (dirge-p1ws).
        #[cfg(feature = "lsp")]
        let was_repaired = syntax_note.is_some();
        #[cfg(feature = "lsp")]
        let write_at = std::time::Instant::now();
        // Snapshot pre-edit content for /rewind before mutating. Reuse
        // the bytes already read above rather than re-reading from disk.
        crate::agent::tools::snapshots::capture_bytes(std::path::Path::new(&resolved_path), &bytes);
        // Atomic write so a mid-write crash leaves the previous
        // content intact rather than a truncated half-write.
        crate::fs_atomic::atomic_write(std::path::Path::new(&resolved_path), output.as_bytes())
            .await?;
        crate::agent::tools::modified::mark_modified(std::path::Path::new(&resolved_path));
        // File mutated → invalidate cached reads/greps/listings for this turn.
        // A successful edit leaves the model with accurate on-disk knowledge,
        // so keep the path marked read for subsequent edits (clear() preserves
        // the read-set).
        if let Some(ref cache) = self.cache {
            cache.clear();
            cache.mark_read(std::path::Path::new(&resolved_path));
        }

        // Path lives in the chamber banner (`╭─ EDIT ─ "<path>" ─╮`),
        // so don't repeat it. The diff block below is the meat;
        // this first line is a compact summary.
        let mut result = if do_replace_all {
            format!(
                "Applied edit ({} replacements){}",
                match_positions.len(),
                fallback_note
            )
        } else {
            format!("Applied edit{}", fallback_note)
        };
        crate::agent::tools::append_repair_note(&mut result, syntax_note);
        // Mention the line delta when adding/removing lines so the
        // LLM can confirm the size of change without re-reading
        // the diff block. For replace_all the per-replacement
        // delta multiplies by the number of replacements — the
        // user wants the FILE delta, not the per-instance delta.
        let old_lines = args.old_text.lines().count();
        let new_lines = args.new_text.lines().count();
        let per_replacement_delta = new_lines as i64 - old_lines as i64;
        let total_delta = if do_replace_all {
            per_replacement_delta * (match_positions.len() as i64)
        } else {
            per_replacement_delta
        };
        if total_delta != 0 {
            result.push_str(&format!(" ({:+} lines)", total_delta));
        }

        // Always emit a diff. The earlier 20-line cap was meant to
        // keep LLM context lean, but in practice it silently hid
        // useful diffs for any non-trivial edit. Bump to 200 lines
        // per side which covers the vast majority of real edits;
        // edits larger than that are likely refactors where the
        // "edit + diff" pattern isn't the right tool anyway.
        // `old_lines` / `new_lines` already computed above for the
        // delta summary.
        if old_lines <= 200 && new_lines <= 200 {
            result.push_str(&Self::show_diff(
                &args.path,
                &content,
                byte_pos,
                &args.old_text,
                &args.new_text,
            ));
        }

        #[cfg(feature = "lsp")]
        {
            let path = std::path::Path::new(&resolved_path);
            // A repaired edit is verified by the language server; if the close
            // produced errors, the file is rolled back to its pre-edit bytes
            // and the model gets the diagnostics (dirge-p1ws). A clean edit
            // keeps today's surface-don't-block behavior.
            if was_repaired {
                match crate::agent::tools::write::verify_repaired_write_or_rollback(
                    self.lsp_manager.as_ref(),
                    path,
                    Some(bytes.clone()),
                    // Edit always targets an existing file; `before` is always
                    // Some, so this flag is moot, but keep it honest.
                    false,
                    write_at,
                )
                .await
                {
                    Ok(block) => result.push_str(&block),
                    Err(feedback) => {
                        if let Some(ref cache) = self.cache {
                            cache.clear();
                        }
                        return Err(ToolError::Msg(feedback));
                    }
                }
            } else {
                result.push_str(
                    &crate::agent::tools::write::append_lsp_block(
                        self.lsp_manager.as_ref(),
                        path,
                        write_at,
                    )
                    .await,
                );
            }
        }
        Ok(result)
    }
}

// B3-9 — replacer cascade helpers. Port of opencode's edit.ts:240-540
// fallback ladder. Each helper returns a Vec of (start_byte,
// end_byte) byte ranges in `content` that match `find` under the
// helper's normalization. Empty Vec = no matches. The cascade
// tries each in priority order in the call site above.

/// dirge-nj6d: reduce a set of (start, end) byte ranges to a disjoint
/// subset. Sorts by start and greedily keeps a range only if it begins
/// at or after the previously-kept range's end; overlapping ranges are
/// dropped. This protects the reverse-order `replace_range` splice from
/// corruption / non-char-boundary panics when the fuzzy matchers emit
/// overlapping candidates. Stable preference: the earliest-starting
/// range of any overlapping cluster wins.
fn keep_disjoint_ranges(mut ranges: Vec<(usize, usize)>) -> Vec<(usize, usize)> {
    ranges.sort_by_key(|r| r.0);
    let mut disjoint: Vec<(usize, usize)> = Vec::with_capacity(ranges.len());
    for (start, end) in ranges {
        match disjoint.last() {
            Some(&(_, last_end)) if start < last_end => {} // overlaps kept → drop
            _ => disjoint.push((start, end)),
        }
    }
    disjoint
}

/// Line-trimmed match. Match each logical block of N lines where
/// each line's .trim() equals the corresponding find line's
/// .trim(). Catches the common case of "LLM emitted the right
/// content but with slightly off indent or trailing whitespace."
/// Mirrors opencode `LineTrimmedReplacer` (edit.ts:244).
fn find_line_trimmed_matches(content: &str, find: &str) -> Option<Vec<(usize, usize)>> {
    let content_lines: Vec<&str> = content.split('\n').collect();
    let find_lines: Vec<&str> = find.split('\n').collect();
    if find_lines.is_empty() {
        return None;
    }
    // Line-start byte offsets for content.
    let mut line_starts = Vec::with_capacity(content_lines.len() + 1);
    line_starts.push(0usize);
    let mut acc = 0usize;
    for line in &content_lines {
        acc += line.len() + 1; // +1 for the \n separator
        line_starts.push(acc);
    }
    let mut out = Vec::new();
    for i in 0..=content_lines.len().saturating_sub(find_lines.len()) {
        let block = &content_lines[i..i + find_lines.len()];
        let all_trim_match = block
            .iter()
            .zip(find_lines.iter())
            .all(|(a, b)| a.trim() == b.trim());
        if !all_trim_match {
            continue;
        }
        let start_byte = line_starts[i];
        // End of the matched block (no trailing \n unless the
        // block ends with one in source). Compute by walking
        // forward: sum byte lengths + (n-1) interior newlines.
        let mut end_byte = start_byte;
        for (k, line) in block.iter().enumerate() {
            end_byte += line.len();
            if k < block.len() - 1 {
                end_byte += 1;
            }
        }
        out.push((start_byte, end_byte));
    }
    Some(out)
}

/// Whitespace-normalized match. Collapse all whitespace runs in
/// both content and find to single spaces, then look for line-by-
/// line equality. Mirrors opencode `WhitespaceNormalizedReplacer`
/// (edit.ts:419). Catches "LLM tab vs spaces" / "double-spaces"
/// drift.
fn find_whitespace_normalized_matches(content: &str, find: &str) -> Option<Vec<(usize, usize)>> {
    fn normalize(s: &str) -> String {
        let mut out = String::with_capacity(s.len());
        let mut prev_ws = false;
        for c in s.chars() {
            if c.is_whitespace() {
                if !prev_ws && !out.is_empty() {
                    out.push(' ');
                }
                prev_ws = true;
            } else {
                out.push(c);
                prev_ws = false;
            }
        }
        if out.ends_with(' ') {
            out.pop();
        }
        out
    }
    let norm_find = normalize(find);
    if norm_find.is_empty() {
        return None;
    }
    let find_lines: Vec<&str> = find.split('\n').collect();
    let content_lines: Vec<&str> = content.split('\n').collect();
    let mut line_starts = Vec::with_capacity(content_lines.len() + 1);
    line_starts.push(0usize);
    let mut acc = 0usize;
    for line in &content_lines {
        acc += line.len() + 1;
        line_starts.push(acc);
    }
    // Try block sizes from find_lines.len() up to find_lines.len()
    // + 5 (cap) so a single-line `find` can match a 3-line block in
    // content (LLM emitted "fn foo() { let x = 1; }" but source has
    // it on 3 lines). +5 covers typical re-formatting drift without
    // O(N²) blowup. For a given start line, keep only the SHORTEST
    // matching block size — multiple block sizes can hit the same
    // start when trailing empty lines normalize to nothing.
    use std::collections::HashMap;
    let mut by_start: HashMap<usize, (usize, usize)> = HashMap::new();
    let max_block = find_lines.len() + 5;
    for block_size in find_lines.len()..=max_block.min(content_lines.len()) {
        if block_size == 0 {
            continue;
        }
        for i in 0..=content_lines.len().saturating_sub(block_size) {
            let block = &content_lines[i..i + block_size];
            let block_text = block.join("\n");
            if normalize(&block_text) != norm_find {
                continue;
            }
            let start_byte = line_starts[i];
            let mut end_byte = start_byte;
            for (k, line) in block.iter().enumerate() {
                end_byte += line.len();
                if k < block.len() - 1 {
                    end_byte += 1;
                }
            }
            by_start
                .entry(start_byte)
                .and_modify(|cur| {
                    if end_byte < cur.1 {
                        *cur = (start_byte, end_byte);
                    }
                })
                .or_insert((start_byte, end_byte));
        }
    }
    let mut out: Vec<(usize, usize)> = by_start.into_values().collect();
    out.sort_by_key(|(s, _)| *s);
    Some(out)
}

/// Indentation-flexible match. Strip the minimum common leading
/// whitespace from both find and each candidate block, then
/// compare. Mirrors opencode `IndentationFlexibleReplacer`
/// (edit.ts:463). Catches the case where the LLM emitted code
/// with a different baseline indent than the source.
fn find_indentation_flexible_matches(content: &str, find: &str) -> Option<Vec<(usize, usize)>> {
    fn strip_min_indent(s: &str) -> String {
        let lines: Vec<&str> = s.split('\n').collect();
        let min_indent = lines
            .iter()
            .filter(|l| !l.trim().is_empty())
            .map(|l| l.chars().take_while(|c| c.is_whitespace()).count())
            .min()
            .unwrap_or(0);
        lines
            .iter()
            .map(|l| {
                if l.trim().is_empty() {
                    String::from(*l)
                } else {
                    // Slice off the first min_indent characters
                    // safely (each is whitespace, so single-byte
                    // ASCII; but use char-aware slice anyway).
                    let mut chars = l.chars();
                    for _ in 0..min_indent {
                        chars.next();
                    }
                    chars.collect::<String>()
                }
            })
            .collect::<Vec<_>>()
            .join("\n")
    }
    let norm_find = strip_min_indent(find);
    let find_lines: Vec<&str> = find.split('\n').collect();
    let content_lines: Vec<&str> = content.split('\n').collect();
    let mut line_starts = Vec::with_capacity(content_lines.len() + 1);
    line_starts.push(0usize);
    let mut acc = 0usize;
    for line in &content_lines {
        acc += line.len() + 1;
        line_starts.push(acc);
    }
    let mut out = Vec::new();
    for i in 0..=content_lines.len().saturating_sub(find_lines.len()) {
        let block = &content_lines[i..i + find_lines.len()];
        let block_text = block.join("\n");
        if strip_min_indent(&block_text) != norm_find {
            continue;
        }
        let start_byte = line_starts[i];
        let mut end_byte = start_byte;
        for (k, line) in block.iter().enumerate() {
            end_byte += line.len();
            if k < block.len() - 1 {
                end_byte += 1;
            }
        }
        out.push((start_byte, end_byte));
    }
    Some(out)
}

#[cfg(test)]
mod fuzzy_tests {
    use super::*;

    #[test]
    fn line_trimmed_matches_indent_drift() {
        let content = "fn foo() {\n    let x = 1;\n    let y = 2;\n}\n";
        // LLM emitted with no leading indent.
        let find = "let x = 1;\nlet y = 2;";
        let m = find_line_trimmed_matches(content, find).unwrap();
        assert_eq!(m.len(), 1);
        let (s, e) = m[0];
        assert_eq!(&content[s..e], "    let x = 1;\n    let y = 2;");
    }

    #[test]
    fn line_trimmed_no_match_when_content_differs() {
        let content = "let x = 1;\nlet y = 2;\n";
        let find = "let x = 1;\nlet z = 3;";
        let m = find_line_trimmed_matches(content, find).unwrap();
        assert!(m.is_empty());
    }

    #[test]
    fn whitespace_normalized_matches_tab_vs_spaces() {
        let content = "fn  foo()  {\n\tlet x = 1;\n}\n";
        let find = "fn foo() { let x = 1; }";
        let m = find_whitespace_normalized_matches(content, find).unwrap();
        // Block spans the 3 lines fn... { ... } when joined.
        assert_eq!(m.len(), 1);
    }

    #[test]
    fn indentation_flexible_matches_re_indented_block() {
        let content = "fn foo() {\n        let x = 1;\n        let y = 2;\n}\n";
        // LLM emitted the inner block with NO baseline indent.
        let find = "let x = 1;\nlet y = 2;";
        let m = find_indentation_flexible_matches(content, find).unwrap();
        assert_eq!(m.len(), 1);
        let (s, e) = m[0];
        assert_eq!(&content[s..e], "        let x = 1;\n        let y = 2;");
    }

    // ── dirge-nj6d: overlapping-range dedup for replace_all ──

    #[test]
    fn keep_disjoint_drops_overlaps_keeps_earliest() {
        // Two overlapping clusters; earliest-starting range of each wins.
        let input = vec![(0, 10), (5, 15), (12, 20), (20, 25)];
        assert_eq!(
            keep_disjoint_ranges(input),
            vec![(0, 10), (12, 20), (20, 25)],
        );
    }

    #[test]
    fn keep_disjoint_sorts_unsorted_input() {
        // Unsorted input with a nested range fully inside an earlier one.
        let input = vec![(20, 25), (0, 30), (5, 8)];
        // After sort: (0,30),(5,8),(20,25). (5,8) and (20,25) both inside
        // (0,30) → dropped.
        assert_eq!(keep_disjoint_ranges(input), vec![(0, 30)]);
    }

    #[test]
    fn keep_disjoint_adjacent_ranges_are_kept() {
        // end-exclusive: (0,5) and (5,10) touch but don't overlap.
        let input = vec![(0, 5), (5, 10)];
        assert_eq!(keep_disjoint_ranges(input), vec![(0, 5), (5, 10)]);
    }

    #[test]
    fn keep_disjoint_empty() {
        assert!(keep_disjoint_ranges(Vec::new()).is_empty());
    }

    /// Mirrors the call-site reverse-order `replace_range` splice over a
    /// set that originated as OVERLAPPING matcher output. Without
    /// `keep_disjoint_ranges` this corrupts the buffer (and can panic at a
    /// non-char boundary); with it, the splice is safe and correct.
    #[test]
    fn replace_all_reverse_splice_over_deduped_ranges_is_safe() {
        let content = "aXbXc".to_string();
        // (1,3) and (1,2) overlap; (3,4) is disjoint.
        let overlapping = vec![(1, 3), (1, 2), (3, 4)];
        let ranges = keep_disjoint_ranges(overlapping);
        assert_eq!(ranges, vec![(1, 3), (3, 4)]);
        let mut out = content.clone();
        for (s, e) in ranges.into_iter().rev() {
            out.replace_range(s..e, "_");
        }
        assert_eq!(out, "a__c");
    }
}

#[cfg(test)]
mod read_gate_tests {
    use super::*;
    use crate::agent::tools::EditArgs;
    use crate::agent::tools::cache::ToolCache;

    fn tool_with_cache(cache: ToolCache) -> EditTool {
        EditTool {
            permission: None,
            ask_tx: None,
            cache: Some(cache),
            #[cfg(feature = "lsp")]
            lsp_manager: None,
        }
    }

    /// vix read-before-edit gate: editing a file not read this session is
    /// refused; reading it (marking the cache) then editing succeeds.
    #[tokio::test]
    async fn edit_blocked_until_file_is_read() {
        let dir = std::env::temp_dir().join(format!("dirge-edit-gate-{}", std::process::id()));
        std::fs::create_dir_all(&dir).unwrap();
        let path = dir.join("g.txt");
        std::fs::write(&path, "hello\nworld\n").unwrap();
        let abs = path.to_string_lossy().to_string();
        // Mark using the SAME resolution the tool uses (the gate keys on
        // `check_perm_path_resolve`'s output, not raw canonicalize()).
        let resolved = crate::agent::tools::check_perm_path_resolve(&None, &None, "read", &abs)
            .await
            .unwrap();

        let cache = ToolCache::new();
        let tool = tool_with_cache(cache.clone());
        let args = || EditArgs {
            path: abs.clone(),
            old_text: "world".to_string(),
            new_text: "WORLD".to_string(),
            replace_all: None,
        };

        // Unread → blocked.
        let blocked = tool.call(args()).await;
        let err = blocked.expect_err("edit must be gated before read");
        assert!(
            err.to_string().contains("has not been read"),
            "gate message; got {err}"
        );

        // Mark read (as ReadTool would) → allowed.
        cache.mark_read(std::path::Path::new(&resolved));
        let ok = tool.call(args()).await;
        assert!(ok.is_ok(), "edit should succeed after read; got {ok:?}");
        assert_eq!(std::fs::read_to_string(&path).unwrap(), "hello\nWORLD\n");

        let _ = std::fs::remove_dir_all(&dir);
    }

    /// No cache (e.g. cache-less construction) ⇒ no gate (can't track reads).
    #[tokio::test]
    async fn no_cache_means_no_gate() {
        let dir = std::env::temp_dir().join(format!("dirge-edit-nogate-{}", std::process::id()));
        std::fs::create_dir_all(&dir).unwrap();
        let path = dir.join("g.txt");
        std::fs::write(&path, "a\nb\n").unwrap();

        let tool = EditTool::new(None, None);
        let ok = tool
            .call(EditArgs {
                path: path.to_string_lossy().to_string(),
                old_text: "b".to_string(),
                new_text: "B".to_string(),
                replace_all: None,
            })
            .await;
        assert!(ok.is_ok(), "no cache ⇒ ungated; got {ok:?}");
        let _ = std::fs::remove_dir_all(&dir);
    }
}