Skip to main content

coding_tools/
block.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright 2026 Jonathan Shook
3
4//! Line-anchored literal block matching, shared by `ct-search`, `ct-view`,
5//! and `ct-edit`.
6//!
7//! A multi-line pattern matches as a *block*: a find block of K lines matches
8//! K consecutive source lines exactly, byte-for-byte, leading and trailing
9//! whitespace significant. Only the line *ending* is not significant — a
10//! `\n`-split find block matches CRLF source, and [`edit_blocks`] preserves each
11//! file's own endings (and a missing final newline) on write. When a block fails
12//! to match, [`nearest_miss`]
13//! reports the best partial alignment — the candidate with the longest
14//! matching prefix and the first diverging line — so the author sees *why*
15//! the anchor missed (whitespace drift, a comment edit, an already-applied
16//! change) without bisecting by hand.
17
18/// The best partial alignment of a block that did not match.
19#[derive(Debug, Clone, PartialEq, Eq)]
20pub struct NearestMiss {
21    /// 1-based source line where the best candidate alignment starts.
22    pub line: usize,
23    /// 1-based index *into the block* of the first diverging line.
24    pub first_diverging_line: usize,
25    /// The block line that was expected at the divergence.
26    pub expected: String,
27    /// The source line actually found there (empty past end of file).
28    pub found: String,
29    /// Total number of lines in the find block (for self-diagnosing output).
30    pub block_len: usize,
31}
32
33impl NearestMiss {
34    /// A diagnostic note when the divergence is likely a *stray blank line* in
35    /// the find payload — the expected block line is empty, which an editor's
36    /// trailing newline (or a hand-pasted blank line) commonly produces — else
37    /// `None`. Block anchors taken from `file:` payloads have their trailing
38    /// blank lines trimmed, so this remains useful mainly for inline/`text:`
39    /// payloads and interior empty lines.
40    pub fn blank_line_hint(&self) -> Option<String> {
41        self.expected.is_empty().then(|| {
42            format!(
43                "the find block's line {} (of {}) is empty — likely a stray blank or \
44                 trailing line in the payload; trim it, or pass the anchor via text:",
45                self.first_diverging_line, self.block_len
46            )
47        })
48    }
49}
50
51/// Find every non-overlapping occurrence of `block` in `lines`, scanning
52/// forward. Returns the 0-based start indices.
53///
54/// # Examples
55///
56/// ```
57/// use coding_tools::block::find_starts;
58///
59/// let lines = ["a", "b", "c", "a", "b"];
60/// let block = ["a".to_string(), "b".to_string()];
61/// assert_eq!(find_starts(&lines, &block), vec![0, 3]);
62/// ```
63pub fn find_starts<S: AsRef<str>>(lines: &[S], block: &[String]) -> Vec<usize> {
64    let k = block.len();
65    if k == 0 || lines.len() < k {
66        return Vec::new();
67    }
68    let mut starts = Vec::new();
69    let mut i = 0usize;
70    while i + k <= lines.len() {
71        if block
72            .iter()
73            .zip(&lines[i..i + k])
74            .all(|(b, l)| b == l.as_ref())
75        {
76            starts.push(i);
77            i += k; // non-overlapping: continue past the match
78        } else {
79            i += 1;
80        }
81    }
82    starts
83}
84
85/// Whether a line counts as *blank* for blank-run squeezing: empty or
86/// whitespace-only. Squeezing deliberately treats a `"   "` line as blank.
87fn is_blank(s: &str) -> bool {
88    s.trim().is_empty()
89}
90
91/// Align `block` against `lines` from source index `start`, *squeezing blank
92/// runs*: a maximal run of blank lines in `block` matches a run of one or more
93/// blank lines in the source, and non-blank block lines must match the source
94/// byte-for-byte. On success returns the number of source lines consumed; on
95/// failure returns `(block index that first diverged, source index reached)`.
96fn align_squeezed<S: AsRef<str>>(
97    lines: &[S],
98    block: &[String],
99    start: usize,
100) -> Result<usize, (usize, usize)> {
101    let mut bi = 0usize;
102    let mut li = start;
103    while bi < block.len() {
104        if is_blank(&block[bi]) {
105            let run_start = bi;
106            while bi < block.len() && is_blank(&block[bi]) {
107                bi += 1;
108            }
109            // A blank run in the block requires at least one source blank line.
110            if li >= lines.len() || !is_blank(lines[li].as_ref()) {
111                return Err((run_start, li));
112            }
113            while li < lines.len() && is_blank(lines[li].as_ref()) {
114                li += 1;
115            }
116        } else {
117            if li >= lines.len() || lines[li].as_ref() != block[bi] {
118                return Err((bi, li));
119            }
120            bi += 1;
121            li += 1;
122        }
123    }
124    Ok(li - start)
125}
126
127/// Find every non-overlapping *squeezed* match of `block` in `lines`, scanning
128/// forward (see [`align_squeezed`]). Returns each match's `(0-based start,
129/// source-line count)` span — the span can be longer than the block when the
130/// source has wider blank runs than the anchor.
131///
132/// # Examples
133///
134/// ```
135/// use coding_tools::block::find_spans_squeezed;
136///
137/// // The anchor's single blank line absorbs the source's two blank lines.
138/// let lines = ["foo()", "", "", "bar()"];
139/// let block = ["foo()".to_string(), String::new(), "bar()".to_string()];
140/// assert_eq!(find_spans_squeezed(&lines, &block), vec![(0, 4)]);
141/// ```
142pub fn find_spans_squeezed<S: AsRef<str>>(lines: &[S], block: &[String]) -> Vec<(usize, usize)> {
143    if block.is_empty() {
144        return Vec::new();
145    }
146    let mut spans = Vec::new();
147    let mut i = 0usize;
148    while i < lines.len() {
149        if let Ok(len) = align_squeezed(lines, block, i) {
150            spans.push((i, len));
151            i += len.max(1); // non-overlapping
152        } else {
153            i += 1;
154        }
155    }
156    spans
157}
158
159/// Report the best partial alignment of an unmatched `block` against `lines`:
160/// the start with the longest run of matching leading block lines (ties go to
161/// the earliest). When no line equals the block's first line at all, falls
162/// back to a whitespace-insensitive scan of that first line, so indentation
163/// drift — the most common anchor failure — is still diagnosed.
164pub fn nearest_miss<S: AsRef<str>>(lines: &[S], block: &[String]) -> Option<NearestMiss> {
165    if block.is_empty() || lines.is_empty() {
166        return None;
167    }
168    let mut best: Option<(usize, usize)> = None; // (matched_prefix_len, start)
169    for start in 0..lines.len() {
170        if lines[start].as_ref() != block[0] {
171            continue;
172        }
173        let mut len = 0usize;
174        while len < block.len()
175            && start + len < lines.len()
176            && lines[start + len].as_ref() == block[len]
177        {
178            len += 1;
179        }
180        if best.is_none_or(|(blen, _)| len > blen) {
181            best = Some((len, start));
182        }
183    }
184    if let Some((len, start)) = best {
185        // len == block.len() would have been a match; here it is a prefix.
186        let found = lines
187            .get(start + len)
188            .map(|l| l.as_ref().to_string())
189            .unwrap_or_default();
190        return Some(NearestMiss {
191            line: start + 1,
192            first_diverging_line: len + 1,
193            expected: block.get(len).cloned().unwrap_or_default(),
194            found,
195            block_len: block.len(),
196        });
197    }
198    // No exact first-line anchor anywhere: diagnose whitespace drift on the
199    // first line if a trim-equal candidate exists.
200    let want = block[0].trim();
201    if want.is_empty() {
202        return None;
203    }
204    lines
205        .iter()
206        .position(|l| l.as_ref().trim() == want)
207        .map(|i| NearestMiss {
208            line: i + 1,
209            first_diverging_line: 1,
210            expected: block[0].clone(),
211            found: lines[i].as_ref().to_string(),
212            block_len: block.len(),
213        })
214}
215
216/// [`nearest_miss`], selecting the exact or blank-run-squeezing matcher by
217/// `squeeze`. Used so the diagnostic agrees with how the edit actually matched.
218pub fn nearest_miss_with<S: AsRef<str>>(
219    lines: &[S],
220    block: &[String],
221    squeeze: bool,
222) -> Option<NearestMiss> {
223    if squeeze {
224        nearest_miss_squeezed(lines, block)
225    } else {
226        nearest_miss(lines, block)
227    }
228}
229
230/// The squeeze-aware partial alignment: the anchorable start that consumed the
231/// longest run of leading block lines before diverging (ties go to the
232/// earliest), with blank runs squeezed exactly as [`find_spans_squeezed`] does.
233/// Falls back to the same whitespace-trim scan of the first line as the exact
234/// matcher when no start anchors.
235fn nearest_miss_squeezed<S: AsRef<str>>(lines: &[S], block: &[String]) -> Option<NearestMiss> {
236    if block.is_empty() || lines.is_empty() {
237        return None;
238    }
239    let first_anchors = |src: &str| {
240        if is_blank(&block[0]) {
241            is_blank(src)
242        } else {
243            src == block[0]
244        }
245    };
246    // best = (block lines consumed before divergence, start, source index there)
247    let mut best: Option<(usize, usize, usize)> = None;
248    for start in 0..lines.len() {
249        if !first_anchors(lines[start].as_ref()) {
250            continue;
251        }
252        if let Err((bi, li)) = align_squeezed(lines, block, start)
253            && best.is_none_or(|(blen, _, _)| bi > blen)
254        {
255            best = Some((bi, start, li));
256        }
257    }
258    if let Some((bi, start, li)) = best {
259        let found = lines
260            .get(li)
261            .map(|l| l.as_ref().to_string())
262            .unwrap_or_default();
263        return Some(NearestMiss {
264            line: start + 1,
265            first_diverging_line: bi + 1,
266            expected: block.get(bi).cloned().unwrap_or_default(),
267            found,
268            block_len: block.len(),
269        });
270    }
271    let want = block[0].trim();
272    if want.is_empty() {
273        return None;
274    }
275    lines
276        .iter()
277        .position(|l| l.as_ref().trim() == want)
278        .map(|i| NearestMiss {
279            line: i + 1,
280            first_diverging_line: 1,
281            expected: block[0].clone(),
282            found: lines[i].as_ref().to_string(),
283            block_len: block.len(),
284        })
285}
286
287use crate::edit::Site;
288
289/// Replace every non-overlapping occurrence of `block` in `content` with
290/// `replacement` lines, preserving every untouched byte (including a missing
291/// final newline). An empty `replacement` deletes the matched lines entirely.
292/// Returns the new content, the occurrence count, and the changed sites
293/// (`line` is the block's 1-based start; `before`/`after` are newline-joined).
294///
295/// # Examples
296///
297/// ```
298/// use coding_tools::block::edit_blocks;
299///
300/// let block = vec!["b".to_string(), "c".to_string()];
301/// let repl = vec!["X".to_string()];
302/// let (out, n, sites) = edit_blocks("f", "a\nb\nc\nd\n", &block, &repl);
303/// assert_eq!(out, "a\nX\nd\n");
304/// assert_eq!(n, 1);
305/// assert_eq!(sites[0].line, 2);
306///
307/// // Empty replacement deletes the block's lines.
308/// let (out, _, _) = edit_blocks("f", "a\nb\nc\nd\n", &block, &[]);
309/// assert_eq!(out, "a\nd\n");
310/// ```
311pub fn edit_blocks(
312    path: &str,
313    content: &str,
314    block: &[String],
315    replacement: &[String],
316) -> (String, usize, Vec<Site>) {
317    edit_blocks_with(path, content, block, replacement, false)
318}
319
320/// [`edit_blocks`], with optional blank-run `squeeze`ing of the match (see
321/// [`find_spans_squeezed`]). Under squeeze the replaced source span can be
322/// longer than the block, so each [`Site::before`] carries the *actual* matched
323/// source lines (identical to the block in the exact path).
324pub fn edit_blocks_with(
325    path: &str,
326    content: &str,
327    block: &[String],
328    replacement: &[String],
329    squeeze: bool,
330) -> (String, usize, Vec<Site>) {
331    // Split into (body, terminator) per line so untouched bytes round-trip. The
332    // terminator captures the whole line ending (`\r\n`, `\n`, a lone trailing
333    // `\r`, or none at EOF), so bodies are carriage-return-free and match a
334    // `\n`-split find block on a CRLF file without rewriting the file's endings.
335    let segments: Vec<(&str, &str)> = content
336        .split_inclusive('\n')
337        .map(|seg| {
338            if let Some(b) = seg.strip_suffix("\r\n") {
339                (b, "\r\n")
340            } else if let Some(b) = seg.strip_suffix('\n') {
341                (b, "\n")
342            } else if let Some(b) = seg.strip_suffix('\r') {
343                (b, "\r")
344            } else {
345                (seg, "")
346            }
347        })
348        .collect();
349    let bodies: Vec<&str> = segments.iter().map(|(b, _)| *b).collect();
350    // The file's dominant newline, used for replacement lines when the matched
351    // span itself carries none (a block at EOF with no trailing newline).
352    let default_nl = if content.contains("\r\n") {
353        "\r\n"
354    } else {
355        "\n"
356    };
357    // Each match is a (start, source-line count) span. Exact matching always
358    // spans exactly `block.len()` lines; squeezing can span more.
359    let spans: Vec<(usize, usize)> = if squeeze {
360        find_spans_squeezed(&bodies, block)
361    } else {
362        find_starts(&bodies, block)
363            .into_iter()
364            .map(|s| (s, block.len()))
365            .collect()
366    };
367    if spans.is_empty() {
368        return (content.to_string(), 0, Vec::new());
369    }
370
371    let mut out = String::with_capacity(content.len());
372    let mut sites = Vec::new();
373    let mut next = spans.iter().peekable();
374    let mut i = 0usize;
375    while i < segments.len() {
376        if next.peek().is_some_and(|(s, _)| *s == i) {
377            let (_, span) = *next.next().unwrap();
378            // Preserve the matched span's line endings in the replacement: the
379            // last replacement line inherits the block's last terminator (so a
380            // block ending at EOF-without-newline stays unterminated), and
381            // interior lines take the span's newline style (so a CRLF block is
382            // replaced with CRLF, not mixed endings).
383            let last_term = segments[i + span - 1].1;
384            let nl = segments[i..i + span]
385                .iter()
386                .map(|(_, t)| *t)
387                .find(|t| !t.is_empty())
388                .unwrap_or(default_nl);
389            for (r, rl) in replacement.iter().enumerate() {
390                out.push_str(rl);
391                out.push_str(if r + 1 == replacement.len() {
392                    last_term
393                } else {
394                    nl
395                });
396            }
397            let before = segments[i..i + span]
398                .iter()
399                .map(|(b, _)| *b)
400                .collect::<Vec<_>>()
401                .join("\n");
402            sites.push(Site {
403                path: path.to_string(),
404                line: i + 1,
405                before,
406                after: replacement.join("\n"),
407            });
408            i += span;
409        } else {
410            out.push_str(segments[i].0);
411            out.push_str(segments[i].1);
412            i += 1;
413        }
414    }
415
416    (out, spans.len(), sites)
417}
418
419#[cfg(test)]
420mod tests {
421    use super::*;
422
423    fn block(lines: &[&str]) -> Vec<String> {
424        lines.iter().map(|s| s.to_string()).collect()
425    }
426
427    #[test]
428    fn matches_are_byte_exact_and_non_overlapping() {
429        let lines = ["a", "a", "a"];
430        assert_eq!(find_starts(&lines, &block(&["a", "a"])), vec![0]);
431        // Whitespace is significant.
432        assert!(find_starts(&["  x"], &block(&["x"])).is_empty());
433    }
434
435    #[test]
436    fn nearest_miss_reports_first_divergence() {
437        let lines = ["fn a() {", "    one();", "    two();", "}"];
438        let b = block(&["fn a() {", "    one();", "    three();"]);
439        let m = nearest_miss(&lines, &b).unwrap();
440        assert_eq!(m.line, 1);
441        assert_eq!(m.first_diverging_line, 3);
442        assert_eq!(m.expected, "    three();");
443        assert_eq!(m.found, "    two();");
444    }
445
446    #[test]
447    fn nearest_miss_diagnoses_whitespace_drift_on_the_anchor_line() {
448        let lines = ["\tindented();"];
449        let b = block(&["    indented();"]);
450        let m = nearest_miss(&lines, &b).unwrap();
451        assert_eq!(m.line, 1);
452        assert_eq!(m.first_diverging_line, 1);
453        assert_eq!(m.found, "\tindented();");
454    }
455
456    #[test]
457    fn nearest_miss_past_eof_reports_empty_found() {
458        let lines = ["a"];
459        let b = block(&["a", "b"]);
460        let m = nearest_miss(&lines, &b).unwrap();
461        assert_eq!((m.line, m.first_diverging_line), (1, 2));
462        assert_eq!(m.found, "");
463        assert_eq!(m.block_len, 2);
464    }
465
466    #[test]
467    fn nearest_miss_carries_block_len_and_blank_line_hint() {
468        // A phantom empty line 3 in the find block diverging against real source
469        // is exactly the trailing-newline failure mode — the hint should fire.
470        let lines = ["a", "fn x(", "    body,"];
471        let b = block(&["a", "fn x(", ""]);
472        let m = nearest_miss(&lines, &b).unwrap();
473        assert_eq!(m.first_diverging_line, 3);
474        assert_eq!(m.block_len, 3);
475        assert_eq!(m.expected, "");
476        let hint = m
477            .blank_line_hint()
478            .expect("empty expected line yields a hint");
479        assert!(hint.contains("line 3 (of 3)"), "{hint}");
480
481        // A non-empty divergence is an ordinary mismatch: no blank-line hint.
482        let b2 = block(&["a", "fn y("]);
483        let m2 = nearest_miss(&lines, &b2).unwrap();
484        assert!(m2.blank_line_hint().is_none());
485    }
486
487    #[test]
488    fn block_edit_preserves_missing_final_newline() {
489        let b = block(&["x"]);
490        let (out, n, _) = edit_blocks("f", "a\nx", &b, &block(&["y", "z"]));
491        assert_eq!(out, "a\ny\nz");
492        assert_eq!(n, 1);
493    }
494
495    #[test]
496    fn block_edit_matches_crlf_and_preserves_endings() {
497        // The reported repro: a 3-line block followed by a blank line, all CRLF.
498        // A `\n`-split find block must match, and the file's CRLF must survive.
499        let content = "struct Foo {\r\n    a: u32,\r\n}\r\n\r\nfn keep() {}\r\n";
500        let find = block(&["struct Foo {", "    a: u32,", "}"]);
501
502        // Delete: block removed, surrounding CRLF (incl. the blank line) intact.
503        let (out, n, sites) = edit_blocks("f", content, &find, &[]);
504        assert_eq!(n, 1);
505        assert_eq!(out, "\r\nfn keep() {}\r\n");
506        // The site's `before` is the clean, carriage-return-free body.
507        assert_eq!(sites[0].before, "struct Foo {\n    a: u32,\n}");
508
509        // Replace: the new lines take CRLF too — no mixed endings.
510        let repl = block(&["struct Bar {", "    b: u64,", "}"]);
511        let (out2, n2, _) = edit_blocks("f", content, &find, &repl);
512        assert_eq!(n2, 1);
513        assert_eq!(
514            out2,
515            "struct Bar {\r\n    b: u64,\r\n}\r\n\r\nfn keep() {}\r\n"
516        );
517    }
518
519    #[test]
520    fn block_edit_preserves_crlf_missing_final_newline() {
521        // A CRLF file whose last line has no trailing newline: the last
522        // replacement line stays unterminated, and the interior line uses the
523        // file's CRLF (not a lone LF).
524        let (out, n, _) = edit_blocks("f", "a\r\nx", &block(&["x"]), &block(&["y", "z"]));
525        assert_eq!(n, 1);
526        assert_eq!(out, "a\r\ny\r\nz");
527    }
528
529    #[test]
530    fn block_edit_replaces_multiple_sites() {
531        let b = block(&["x"]);
532        let (out, n, sites) = edit_blocks("f", "x\nm\nx\n", &b, &block(&["y"]));
533        assert_eq!(out, "y\nm\ny\n");
534        assert_eq!(n, 2);
535        assert_eq!(sites.iter().map(|s| s.line).collect::<Vec<_>>(), vec![1, 3]);
536    }
537
538    #[test]
539    fn squeeze_matches_blank_runs_of_any_length() {
540        // Anchor with one blank line; source has two — squeezing aligns them.
541        let lines = ["foo()", "", "", "bar()"];
542        let b = block(&["foo()", "", "bar()"]);
543        // Exact matching misses (1 blank != 2 blanks).
544        assert!(find_starts(&lines, &b).is_empty());
545        // Squeezed matching spans all four source lines from index 0.
546        assert_eq!(find_spans_squeezed(&lines, &b), vec![(0, 4)]);
547        // The reverse also holds: a 2-blank anchor matches a 1-blank source.
548        let lines2 = ["foo()", "", "bar()"];
549        let b2 = block(&["foo()", "", "", "bar()"]);
550        assert_eq!(find_spans_squeezed(&lines2, &b2), vec![(0, 3)]);
551        // Whitespace-only lines count as blank.
552        let lines3 = ["a", "   ", "\t", "b"];
553        let b3 = block(&["a", "", "b"]);
554        assert_eq!(find_spans_squeezed(&lines3, &b3), vec![(0, 4)]);
555    }
556
557    #[test]
558    fn squeeze_still_requires_at_least_one_blank_and_exact_nonblank() {
559        // A blank run in the anchor needs a blank in the source: none here.
560        let lines = ["a", "b"];
561        let b = block(&["a", "", "b"]);
562        assert!(find_spans_squeezed(&lines, &b).is_empty());
563        // Non-blank lines are still byte-exact.
564        let lines2 = ["a", "", "B"];
565        let b2 = block(&["a", "", "b"]);
566        assert!(find_spans_squeezed(&lines2, &b2).is_empty());
567    }
568
569    #[test]
570    fn squeeze_edit_replaces_the_full_source_span() {
571        // Two source blanks collapse into whatever the replacement specifies;
572        // the matched span (4 lines) is what gets replaced, and the site's
573        // `before` reflects the real source, not the anchor.
574        let b = block(&["foo()", "", "bar()"]);
575        let repl = block(&["foo()", "", "bar()"]);
576        let (out, n, sites) = edit_blocks_with("f", "foo()\n\n\nbar()\nrest\n", &b, &repl, true);
577        assert_eq!(n, 1);
578        assert_eq!(out, "foo()\n\nbar()\nrest\n");
579        assert_eq!(sites[0].before, "foo()\n\n\nbar()");
580    }
581
582    #[test]
583    fn squeeze_nearest_miss_diverges_on_the_nonblank_line() {
584        // foo() and the blank run align; `baz()` diverges from `bar()`.
585        let lines = ["foo()", "", "", "bar()"];
586        let b = block(&["foo()", "", "baz()"]);
587        let m = nearest_miss_with(&lines, &b, true).unwrap();
588        assert_eq!(m.first_diverging_line, 3);
589        assert_eq!(m.expected, "baz()");
590        assert_eq!(m.found, "bar()");
591        assert_eq!(m.line, 1);
592    }
593}