coding-tools 0.8.7

Declarative, agent-friendly CLI tools behind one 'ct' command: search, view, verifiable edits, and framed command tests.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
// SPDX-License-Identifier: Apache-2.0
// Copyright 2026 Jonathan Shook

//! Line-anchored literal block matching, shared by `ct-search`, `ct-view`,
//! and `ct-edit`.
//!
//! A multi-line pattern matches as a *block*: a find block of K lines matches
//! K consecutive source lines exactly, byte-for-byte, leading and trailing
//! whitespace significant. Only the line *ending* is not significant — a
//! `\n`-split find block matches CRLF source, and [`edit_blocks`] preserves each
//! file's own endings (and a missing final newline) on write. When a block fails
//! to match, [`nearest_miss`]
//! reports the best partial alignment — the candidate with the longest
//! matching prefix and the first diverging line — so the author sees *why*
//! the anchor missed (whitespace drift, a comment edit, an already-applied
//! change) without bisecting by hand.

/// The best partial alignment of a block that did not match.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct NearestMiss {
    /// 1-based source line where the best candidate alignment starts.
    pub line: usize,
    /// 1-based index *into the block* of the first diverging line.
    pub first_diverging_line: usize,
    /// The block line that was expected at the divergence.
    pub expected: String,
    /// The source line actually found there (empty past end of file).
    pub found: String,
    /// Total number of lines in the find block (for self-diagnosing output).
    pub block_len: usize,
}

impl NearestMiss {
    /// A diagnostic note when the divergence is likely a *stray blank line* in
    /// the find payload — the expected block line is empty, which an editor's
    /// trailing newline (or a hand-pasted blank line) commonly produces — else
    /// `None`. Block anchors taken from `file:` payloads have their trailing
    /// blank lines trimmed, so this remains useful mainly for inline/`text:`
    /// payloads and interior empty lines.
    pub fn blank_line_hint(&self) -> Option<String> {
        self.expected.is_empty().then(|| {
            format!(
                "the find block's line {} (of {}) is empty — likely a stray blank or \
                 trailing line in the payload; trim it, or pass the anchor via text:",
                self.first_diverging_line, self.block_len
            )
        })
    }
}

/// Find every non-overlapping occurrence of `block` in `lines`, scanning
/// forward. Returns the 0-based start indices.
///
/// # Examples
///
/// ```
/// use coding_tools::block::find_starts;
///
/// let lines = ["a", "b", "c", "a", "b"];
/// let block = ["a".to_string(), "b".to_string()];
/// assert_eq!(find_starts(&lines, &block), vec![0, 3]);
/// ```
pub fn find_starts<S: AsRef<str>>(lines: &[S], block: &[String]) -> Vec<usize> {
    let k = block.len();
    if k == 0 || lines.len() < k {
        return Vec::new();
    }
    let mut starts = Vec::new();
    let mut i = 0usize;
    while i + k <= lines.len() {
        if block
            .iter()
            .zip(&lines[i..i + k])
            .all(|(b, l)| b == l.as_ref())
        {
            starts.push(i);
            i += k; // non-overlapping: continue past the match
        } else {
            i += 1;
        }
    }
    starts
}

/// Whether a line counts as *blank* for blank-run squeezing: empty or
/// whitespace-only. Squeezing deliberately treats a `"   "` line as blank.
fn is_blank(s: &str) -> bool {
    s.trim().is_empty()
}

/// Align `block` against `lines` from source index `start`, *squeezing blank
/// runs*: a maximal run of blank lines in `block` matches a run of one or more
/// blank lines in the source, and non-blank block lines must match the source
/// byte-for-byte. On success returns the number of source lines consumed; on
/// failure returns `(block index that first diverged, source index reached)`.
fn align_squeezed<S: AsRef<str>>(
    lines: &[S],
    block: &[String],
    start: usize,
) -> Result<usize, (usize, usize)> {
    let mut bi = 0usize;
    let mut li = start;
    while bi < block.len() {
        if is_blank(&block[bi]) {
            let run_start = bi;
            while bi < block.len() && is_blank(&block[bi]) {
                bi += 1;
            }
            // A blank run in the block requires at least one source blank line.
            if li >= lines.len() || !is_blank(lines[li].as_ref()) {
                return Err((run_start, li));
            }
            while li < lines.len() && is_blank(lines[li].as_ref()) {
                li += 1;
            }
        } else {
            if li >= lines.len() || lines[li].as_ref() != block[bi] {
                return Err((bi, li));
            }
            bi += 1;
            li += 1;
        }
    }
    Ok(li - start)
}

/// Find every non-overlapping *squeezed* match of `block` in `lines`, scanning
/// forward (see [`align_squeezed`]). Returns each match's `(0-based start,
/// source-line count)` span — the span can be longer than the block when the
/// source has wider blank runs than the anchor.
///
/// # Examples
///
/// ```
/// use coding_tools::block::find_spans_squeezed;
///
/// // The anchor's single blank line absorbs the source's two blank lines.
/// let lines = ["foo()", "", "", "bar()"];
/// let block = ["foo()".to_string(), String::new(), "bar()".to_string()];
/// assert_eq!(find_spans_squeezed(&lines, &block), vec![(0, 4)]);
/// ```
pub fn find_spans_squeezed<S: AsRef<str>>(lines: &[S], block: &[String]) -> Vec<(usize, usize)> {
    if block.is_empty() {
        return Vec::new();
    }
    let mut spans = Vec::new();
    let mut i = 0usize;
    while i < lines.len() {
        if let Ok(len) = align_squeezed(lines, block, i) {
            spans.push((i, len));
            i += len.max(1); // non-overlapping
        } else {
            i += 1;
        }
    }
    spans
}

/// Report the best partial alignment of an unmatched `block` against `lines`:
/// the start with the longest run of matching leading block lines (ties go to
/// the earliest). When no line equals the block's first line at all, falls
/// back to a whitespace-insensitive scan of that first line, so indentation
/// drift — the most common anchor failure — is still diagnosed.
pub fn nearest_miss<S: AsRef<str>>(lines: &[S], block: &[String]) -> Option<NearestMiss> {
    if block.is_empty() || lines.is_empty() {
        return None;
    }
    let mut best: Option<(usize, usize)> = None; // (matched_prefix_len, start)
    for start in 0..lines.len() {
        if lines[start].as_ref() != block[0] {
            continue;
        }
        let mut len = 0usize;
        while len < block.len()
            && start + len < lines.len()
            && lines[start + len].as_ref() == block[len]
        {
            len += 1;
        }
        if best.is_none_or(|(blen, _)| len > blen) {
            best = Some((len, start));
        }
    }
    if let Some((len, start)) = best {
        // len == block.len() would have been a match; here it is a prefix.
        let found = lines
            .get(start + len)
            .map(|l| l.as_ref().to_string())
            .unwrap_or_default();
        return Some(NearestMiss {
            line: start + 1,
            first_diverging_line: len + 1,
            expected: block.get(len).cloned().unwrap_or_default(),
            found,
            block_len: block.len(),
        });
    }
    // No exact first-line anchor anywhere: diagnose whitespace drift on the
    // first line if a trim-equal candidate exists.
    let want = block[0].trim();
    if want.is_empty() {
        return None;
    }
    lines
        .iter()
        .position(|l| l.as_ref().trim() == want)
        .map(|i| NearestMiss {
            line: i + 1,
            first_diverging_line: 1,
            expected: block[0].clone(),
            found: lines[i].as_ref().to_string(),
            block_len: block.len(),
        })
}

/// [`nearest_miss`], selecting the exact or blank-run-squeezing matcher by
/// `squeeze`. Used so the diagnostic agrees with how the edit actually matched.
pub fn nearest_miss_with<S: AsRef<str>>(
    lines: &[S],
    block: &[String],
    squeeze: bool,
) -> Option<NearestMiss> {
    if squeeze {
        nearest_miss_squeezed(lines, block)
    } else {
        nearest_miss(lines, block)
    }
}

/// The squeeze-aware partial alignment: the anchorable start that consumed the
/// longest run of leading block lines before diverging (ties go to the
/// earliest), with blank runs squeezed exactly as [`find_spans_squeezed`] does.
/// Falls back to the same whitespace-trim scan of the first line as the exact
/// matcher when no start anchors.
fn nearest_miss_squeezed<S: AsRef<str>>(lines: &[S], block: &[String]) -> Option<NearestMiss> {
    if block.is_empty() || lines.is_empty() {
        return None;
    }
    let first_anchors = |src: &str| {
        if is_blank(&block[0]) {
            is_blank(src)
        } else {
            src == block[0]
        }
    };
    // best = (block lines consumed before divergence, start, source index there)
    let mut best: Option<(usize, usize, usize)> = None;
    for start in 0..lines.len() {
        if !first_anchors(lines[start].as_ref()) {
            continue;
        }
        if let Err((bi, li)) = align_squeezed(lines, block, start)
            && best.is_none_or(|(blen, _, _)| bi > blen)
        {
            best = Some((bi, start, li));
        }
    }
    if let Some((bi, start, li)) = best {
        let found = lines
            .get(li)
            .map(|l| l.as_ref().to_string())
            .unwrap_or_default();
        return Some(NearestMiss {
            line: start + 1,
            first_diverging_line: bi + 1,
            expected: block.get(bi).cloned().unwrap_or_default(),
            found,
            block_len: block.len(),
        });
    }
    let want = block[0].trim();
    if want.is_empty() {
        return None;
    }
    lines
        .iter()
        .position(|l| l.as_ref().trim() == want)
        .map(|i| NearestMiss {
            line: i + 1,
            first_diverging_line: 1,
            expected: block[0].clone(),
            found: lines[i].as_ref().to_string(),
            block_len: block.len(),
        })
}

use crate::edit::Site;

/// Replace every non-overlapping occurrence of `block` in `content` with
/// `replacement` lines, preserving every untouched byte (including a missing
/// final newline). An empty `replacement` deletes the matched lines entirely.
/// Returns the new content, the occurrence count, and the changed sites
/// (`line` is the block's 1-based start; `before`/`after` are newline-joined).
///
/// # Examples
///
/// ```
/// use coding_tools::block::edit_blocks;
///
/// let block = vec!["b".to_string(), "c".to_string()];
/// let repl = vec!["X".to_string()];
/// let (out, n, sites) = edit_blocks("f", "a\nb\nc\nd\n", &block, &repl);
/// assert_eq!(out, "a\nX\nd\n");
/// assert_eq!(n, 1);
/// assert_eq!(sites[0].line, 2);
///
/// // Empty replacement deletes the block's lines.
/// let (out, _, _) = edit_blocks("f", "a\nb\nc\nd\n", &block, &[]);
/// assert_eq!(out, "a\nd\n");
/// ```
pub fn edit_blocks(
    path: &str,
    content: &str,
    block: &[String],
    replacement: &[String],
) -> (String, usize, Vec<Site>) {
    edit_blocks_with(path, content, block, replacement, false)
}

/// [`edit_blocks`], with optional blank-run `squeeze`ing of the match (see
/// [`find_spans_squeezed`]). Under squeeze the replaced source span can be
/// longer than the block, so each [`Site::before`] carries the *actual* matched
/// source lines (identical to the block in the exact path).
pub fn edit_blocks_with(
    path: &str,
    content: &str,
    block: &[String],
    replacement: &[String],
    squeeze: bool,
) -> (String, usize, Vec<Site>) {
    // Split into (body, terminator) per line so untouched bytes round-trip. The
    // terminator captures the whole line ending (`\r\n`, `\n`, a lone trailing
    // `\r`, or none at EOF), so bodies are carriage-return-free and match a
    // `\n`-split find block on a CRLF file without rewriting the file's endings.
    let segments: Vec<(&str, &str)> = content
        .split_inclusive('\n')
        .map(|seg| {
            if let Some(b) = seg.strip_suffix("\r\n") {
                (b, "\r\n")
            } else if let Some(b) = seg.strip_suffix('\n') {
                (b, "\n")
            } else if let Some(b) = seg.strip_suffix('\r') {
                (b, "\r")
            } else {
                (seg, "")
            }
        })
        .collect();
    let bodies: Vec<&str> = segments.iter().map(|(b, _)| *b).collect();
    // The file's dominant newline, used for replacement lines when the matched
    // span itself carries none (a block at EOF with no trailing newline).
    let default_nl = if content.contains("\r\n") {
        "\r\n"
    } else {
        "\n"
    };
    // Each match is a (start, source-line count) span. Exact matching always
    // spans exactly `block.len()` lines; squeezing can span more.
    let spans: Vec<(usize, usize)> = if squeeze {
        find_spans_squeezed(&bodies, block)
    } else {
        find_starts(&bodies, block)
            .into_iter()
            .map(|s| (s, block.len()))
            .collect()
    };
    if spans.is_empty() {
        return (content.to_string(), 0, Vec::new());
    }

    let mut out = String::with_capacity(content.len());
    let mut sites = Vec::new();
    let mut next = spans.iter().peekable();
    let mut i = 0usize;
    while i < segments.len() {
        if next.peek().is_some_and(|(s, _)| *s == i) {
            let (_, span) = *next.next().unwrap();
            // Preserve the matched span's line endings in the replacement: the
            // last replacement line inherits the block's last terminator (so a
            // block ending at EOF-without-newline stays unterminated), and
            // interior lines take the span's newline style (so a CRLF block is
            // replaced with CRLF, not mixed endings).
            let last_term = segments[i + span - 1].1;
            let nl = segments[i..i + span]
                .iter()
                .map(|(_, t)| *t)
                .find(|t| !t.is_empty())
                .unwrap_or(default_nl);
            for (r, rl) in replacement.iter().enumerate() {
                out.push_str(rl);
                out.push_str(if r + 1 == replacement.len() {
                    last_term
                } else {
                    nl
                });
            }
            let before = segments[i..i + span]
                .iter()
                .map(|(b, _)| *b)
                .collect::<Vec<_>>()
                .join("\n");
            sites.push(Site {
                path: path.to_string(),
                line: i + 1,
                before,
                after: replacement.join("\n"),
            });
            i += span;
        } else {
            out.push_str(segments[i].0);
            out.push_str(segments[i].1);
            i += 1;
        }
    }

    (out, spans.len(), sites)
}

#[cfg(test)]
mod tests {
    use super::*;

    fn block(lines: &[&str]) -> Vec<String> {
        lines.iter().map(|s| s.to_string()).collect()
    }

    #[test]
    fn matches_are_byte_exact_and_non_overlapping() {
        let lines = ["a", "a", "a"];
        assert_eq!(find_starts(&lines, &block(&["a", "a"])), vec![0]);
        // Whitespace is significant.
        assert!(find_starts(&["  x"], &block(&["x"])).is_empty());
    }

    #[test]
    fn nearest_miss_reports_first_divergence() {
        let lines = ["fn a() {", "    one();", "    two();", "}"];
        let b = block(&["fn a() {", "    one();", "    three();"]);
        let m = nearest_miss(&lines, &b).unwrap();
        assert_eq!(m.line, 1);
        assert_eq!(m.first_diverging_line, 3);
        assert_eq!(m.expected, "    three();");
        assert_eq!(m.found, "    two();");
    }

    #[test]
    fn nearest_miss_diagnoses_whitespace_drift_on_the_anchor_line() {
        let lines = ["\tindented();"];
        let b = block(&["    indented();"]);
        let m = nearest_miss(&lines, &b).unwrap();
        assert_eq!(m.line, 1);
        assert_eq!(m.first_diverging_line, 1);
        assert_eq!(m.found, "\tindented();");
    }

    #[test]
    fn nearest_miss_past_eof_reports_empty_found() {
        let lines = ["a"];
        let b = block(&["a", "b"]);
        let m = nearest_miss(&lines, &b).unwrap();
        assert_eq!((m.line, m.first_diverging_line), (1, 2));
        assert_eq!(m.found, "");
        assert_eq!(m.block_len, 2);
    }

    #[test]
    fn nearest_miss_carries_block_len_and_blank_line_hint() {
        // A phantom empty line 3 in the find block diverging against real source
        // is exactly the trailing-newline failure mode — the hint should fire.
        let lines = ["a", "fn x(", "    body,"];
        let b = block(&["a", "fn x(", ""]);
        let m = nearest_miss(&lines, &b).unwrap();
        assert_eq!(m.first_diverging_line, 3);
        assert_eq!(m.block_len, 3);
        assert_eq!(m.expected, "");
        let hint = m
            .blank_line_hint()
            .expect("empty expected line yields a hint");
        assert!(hint.contains("line 3 (of 3)"), "{hint}");

        // A non-empty divergence is an ordinary mismatch: no blank-line hint.
        let b2 = block(&["a", "fn y("]);
        let m2 = nearest_miss(&lines, &b2).unwrap();
        assert!(m2.blank_line_hint().is_none());
    }

    #[test]
    fn block_edit_preserves_missing_final_newline() {
        let b = block(&["x"]);
        let (out, n, _) = edit_blocks("f", "a\nx", &b, &block(&["y", "z"]));
        assert_eq!(out, "a\ny\nz");
        assert_eq!(n, 1);
    }

    #[test]
    fn block_edit_matches_crlf_and_preserves_endings() {
        // The reported repro: a 3-line block followed by a blank line, all CRLF.
        // A `\n`-split find block must match, and the file's CRLF must survive.
        let content = "struct Foo {\r\n    a: u32,\r\n}\r\n\r\nfn keep() {}\r\n";
        let find = block(&["struct Foo {", "    a: u32,", "}"]);

        // Delete: block removed, surrounding CRLF (incl. the blank line) intact.
        let (out, n, sites) = edit_blocks("f", content, &find, &[]);
        assert_eq!(n, 1);
        assert_eq!(out, "\r\nfn keep() {}\r\n");
        // The site's `before` is the clean, carriage-return-free body.
        assert_eq!(sites[0].before, "struct Foo {\n    a: u32,\n}");

        // Replace: the new lines take CRLF too — no mixed endings.
        let repl = block(&["struct Bar {", "    b: u64,", "}"]);
        let (out2, n2, _) = edit_blocks("f", content, &find, &repl);
        assert_eq!(n2, 1);
        assert_eq!(
            out2,
            "struct Bar {\r\n    b: u64,\r\n}\r\n\r\nfn keep() {}\r\n"
        );
    }

    #[test]
    fn block_edit_preserves_crlf_missing_final_newline() {
        // A CRLF file whose last line has no trailing newline: the last
        // replacement line stays unterminated, and the interior line uses the
        // file's CRLF (not a lone LF).
        let (out, n, _) = edit_blocks("f", "a\r\nx", &block(&["x"]), &block(&["y", "z"]));
        assert_eq!(n, 1);
        assert_eq!(out, "a\r\ny\r\nz");
    }

    #[test]
    fn block_edit_replaces_multiple_sites() {
        let b = block(&["x"]);
        let (out, n, sites) = edit_blocks("f", "x\nm\nx\n", &b, &block(&["y"]));
        assert_eq!(out, "y\nm\ny\n");
        assert_eq!(n, 2);
        assert_eq!(sites.iter().map(|s| s.line).collect::<Vec<_>>(), vec![1, 3]);
    }

    #[test]
    fn squeeze_matches_blank_runs_of_any_length() {
        // Anchor with one blank line; source has two — squeezing aligns them.
        let lines = ["foo()", "", "", "bar()"];
        let b = block(&["foo()", "", "bar()"]);
        // Exact matching misses (1 blank != 2 blanks).
        assert!(find_starts(&lines, &b).is_empty());
        // Squeezed matching spans all four source lines from index 0.
        assert_eq!(find_spans_squeezed(&lines, &b), vec![(0, 4)]);
        // The reverse also holds: a 2-blank anchor matches a 1-blank source.
        let lines2 = ["foo()", "", "bar()"];
        let b2 = block(&["foo()", "", "", "bar()"]);
        assert_eq!(find_spans_squeezed(&lines2, &b2), vec![(0, 3)]);
        // Whitespace-only lines count as blank.
        let lines3 = ["a", "   ", "\t", "b"];
        let b3 = block(&["a", "", "b"]);
        assert_eq!(find_spans_squeezed(&lines3, &b3), vec![(0, 4)]);
    }

    #[test]
    fn squeeze_still_requires_at_least_one_blank_and_exact_nonblank() {
        // A blank run in the anchor needs a blank in the source: none here.
        let lines = ["a", "b"];
        let b = block(&["a", "", "b"]);
        assert!(find_spans_squeezed(&lines, &b).is_empty());
        // Non-blank lines are still byte-exact.
        let lines2 = ["a", "", "B"];
        let b2 = block(&["a", "", "b"]);
        assert!(find_spans_squeezed(&lines2, &b2).is_empty());
    }

    #[test]
    fn squeeze_edit_replaces_the_full_source_span() {
        // Two source blanks collapse into whatever the replacement specifies;
        // the matched span (4 lines) is what gets replaced, and the site's
        // `before` reflects the real source, not the anchor.
        let b = block(&["foo()", "", "bar()"]);
        let repl = block(&["foo()", "", "bar()"]);
        let (out, n, sites) = edit_blocks_with("f", "foo()\n\n\nbar()\nrest\n", &b, &repl, true);
        assert_eq!(n, 1);
        assert_eq!(out, "foo()\n\nbar()\nrest\n");
        assert_eq!(sites[0].before, "foo()\n\n\nbar()");
    }

    #[test]
    fn squeeze_nearest_miss_diverges_on_the_nonblank_line() {
        // foo() and the blank run align; `baz()` diverges from `bar()`.
        let lines = ["foo()", "", "", "bar()"];
        let b = block(&["foo()", "", "baz()"]);
        let m = nearest_miss_with(&lines, &b, true).unwrap();
        assert_eq!(m.first_diverging_line, 3);
        assert_eq!(m.expected, "baz()");
        assert_eq!(m.found, "bar()");
        assert_eq!(m.line, 1);
    }
}