Skip to main content

coding_tools/
block.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright 2026 Jonathan Shook
3
4//! Line-anchored literal block matching, shared by `ct-search`, `ct-view`,
5//! and `ct-edit`.
6//!
7//! A multi-line pattern matches as a *block*: a find block of K lines matches
8//! K consecutive source lines exactly, byte-for-byte, leading and trailing
9//! whitespace significant. When a block fails to match, [`nearest_miss`]
10//! reports the best partial alignment — the candidate with the longest
11//! matching prefix and the first diverging line — so the author sees *why*
12//! the anchor missed (whitespace drift, a comment edit, an already-applied
13//! change) without bisecting by hand.
14
15/// The best partial alignment of a block that did not match.
16#[derive(Debug, Clone, PartialEq, Eq)]
17pub struct NearestMiss {
18    /// 1-based source line where the best candidate alignment starts.
19    pub line: usize,
20    /// 1-based index *into the block* of the first diverging line.
21    pub first_diverging_line: usize,
22    /// The block line that was expected at the divergence.
23    pub expected: String,
24    /// The source line actually found there (empty past end of file).
25    pub found: String,
26    /// Total number of lines in the find block (for self-diagnosing output).
27    pub block_len: usize,
28}
29
30impl NearestMiss {
31    /// A diagnostic note when the divergence is likely a *stray blank line* in
32    /// the find payload — the expected block line is empty, which an editor's
33    /// trailing newline (or a hand-pasted blank line) commonly produces — else
34    /// `None`. Block anchors taken from `file:` payloads have their trailing
35    /// blank lines trimmed, so this remains useful mainly for inline/`text:`
36    /// payloads and interior empty lines.
37    pub fn blank_line_hint(&self) -> Option<String> {
38        self.expected.is_empty().then(|| {
39            format!(
40                "the find block's line {} (of {}) is empty — likely a stray blank or \
41                 trailing line in the payload; trim it, or pass the anchor via text:",
42                self.first_diverging_line, self.block_len
43            )
44        })
45    }
46}
47
48/// Find every non-overlapping occurrence of `block` in `lines`, scanning
49/// forward. Returns the 0-based start indices.
50///
51/// # Examples
52///
53/// ```
54/// use coding_tools::block::find_starts;
55///
56/// let lines = ["a", "b", "c", "a", "b"];
57/// let block = ["a".to_string(), "b".to_string()];
58/// assert_eq!(find_starts(&lines, &block), vec![0, 3]);
59/// ```
60pub fn find_starts<S: AsRef<str>>(lines: &[S], block: &[String]) -> Vec<usize> {
61    let k = block.len();
62    if k == 0 || lines.len() < k {
63        return Vec::new();
64    }
65    let mut starts = Vec::new();
66    let mut i = 0usize;
67    while i + k <= lines.len() {
68        if block
69            .iter()
70            .zip(&lines[i..i + k])
71            .all(|(b, l)| b == l.as_ref())
72        {
73            starts.push(i);
74            i += k; // non-overlapping: continue past the match
75        } else {
76            i += 1;
77        }
78    }
79    starts
80}
81
82/// Whether a line counts as *blank* for blank-run squeezing: empty or
83/// whitespace-only. Squeezing deliberately treats a `"   "` line as blank.
84fn is_blank(s: &str) -> bool {
85    s.trim().is_empty()
86}
87
88/// Align `block` against `lines` from source index `start`, *squeezing blank
89/// runs*: a maximal run of blank lines in `block` matches a run of one or more
90/// blank lines in the source, and non-blank block lines must match the source
91/// byte-for-byte. On success returns the number of source lines consumed; on
92/// failure returns `(block index that first diverged, source index reached)`.
93fn align_squeezed<S: AsRef<str>>(
94    lines: &[S],
95    block: &[String],
96    start: usize,
97) -> Result<usize, (usize, usize)> {
98    let mut bi = 0usize;
99    let mut li = start;
100    while bi < block.len() {
101        if is_blank(&block[bi]) {
102            let run_start = bi;
103            while bi < block.len() && is_blank(&block[bi]) {
104                bi += 1;
105            }
106            // A blank run in the block requires at least one source blank line.
107            if li >= lines.len() || !is_blank(lines[li].as_ref()) {
108                return Err((run_start, li));
109            }
110            while li < lines.len() && is_blank(lines[li].as_ref()) {
111                li += 1;
112            }
113        } else {
114            if li >= lines.len() || lines[li].as_ref() != block[bi] {
115                return Err((bi, li));
116            }
117            bi += 1;
118            li += 1;
119        }
120    }
121    Ok(li - start)
122}
123
124/// Find every non-overlapping *squeezed* match of `block` in `lines`, scanning
125/// forward (see [`align_squeezed`]). Returns each match's `(0-based start,
126/// source-line count)` span — the span can be longer than the block when the
127/// source has wider blank runs than the anchor.
128///
129/// # Examples
130///
131/// ```
132/// use coding_tools::block::find_spans_squeezed;
133///
134/// // The anchor's single blank line absorbs the source's two blank lines.
135/// let lines = ["foo()", "", "", "bar()"];
136/// let block = ["foo()".to_string(), String::new(), "bar()".to_string()];
137/// assert_eq!(find_spans_squeezed(&lines, &block), vec![(0, 4)]);
138/// ```
139pub fn find_spans_squeezed<S: AsRef<str>>(lines: &[S], block: &[String]) -> Vec<(usize, usize)> {
140    if block.is_empty() {
141        return Vec::new();
142    }
143    let mut spans = Vec::new();
144    let mut i = 0usize;
145    while i < lines.len() {
146        if let Ok(len) = align_squeezed(lines, block, i) {
147            spans.push((i, len));
148            i += len.max(1); // non-overlapping
149        } else {
150            i += 1;
151        }
152    }
153    spans
154}
155
156/// Report the best partial alignment of an unmatched `block` against `lines`:
157/// the start with the longest run of matching leading block lines (ties go to
158/// the earliest). When no line equals the block's first line at all, falls
159/// back to a whitespace-insensitive scan of that first line, so indentation
160/// drift — the most common anchor failure — is still diagnosed.
161pub fn nearest_miss<S: AsRef<str>>(lines: &[S], block: &[String]) -> Option<NearestMiss> {
162    if block.is_empty() || lines.is_empty() {
163        return None;
164    }
165    let mut best: Option<(usize, usize)> = None; // (matched_prefix_len, start)
166    for start in 0..lines.len() {
167        if lines[start].as_ref() != block[0] {
168            continue;
169        }
170        let mut len = 0usize;
171        while len < block.len()
172            && start + len < lines.len()
173            && lines[start + len].as_ref() == block[len]
174        {
175            len += 1;
176        }
177        if best.is_none_or(|(blen, _)| len > blen) {
178            best = Some((len, start));
179        }
180    }
181    if let Some((len, start)) = best {
182        // len == block.len() would have been a match; here it is a prefix.
183        let found = lines
184            .get(start + len)
185            .map(|l| l.as_ref().to_string())
186            .unwrap_or_default();
187        return Some(NearestMiss {
188            line: start + 1,
189            first_diverging_line: len + 1,
190            expected: block.get(len).cloned().unwrap_or_default(),
191            found,
192            block_len: block.len(),
193        });
194    }
195    // No exact first-line anchor anywhere: diagnose whitespace drift on the
196    // first line if a trim-equal candidate exists.
197    let want = block[0].trim();
198    if want.is_empty() {
199        return None;
200    }
201    lines
202        .iter()
203        .position(|l| l.as_ref().trim() == want)
204        .map(|i| NearestMiss {
205            line: i + 1,
206            first_diverging_line: 1,
207            expected: block[0].clone(),
208            found: lines[i].as_ref().to_string(),
209            block_len: block.len(),
210        })
211}
212
213/// [`nearest_miss`], selecting the exact or blank-run-squeezing matcher by
214/// `squeeze`. Used so the diagnostic agrees with how the edit actually matched.
215pub fn nearest_miss_with<S: AsRef<str>>(
216    lines: &[S],
217    block: &[String],
218    squeeze: bool,
219) -> Option<NearestMiss> {
220    if squeeze {
221        nearest_miss_squeezed(lines, block)
222    } else {
223        nearest_miss(lines, block)
224    }
225}
226
227/// The squeeze-aware partial alignment: the anchorable start that consumed the
228/// longest run of leading block lines before diverging (ties go to the
229/// earliest), with blank runs squeezed exactly as [`find_spans_squeezed`] does.
230/// Falls back to the same whitespace-trim scan of the first line as the exact
231/// matcher when no start anchors.
232fn nearest_miss_squeezed<S: AsRef<str>>(lines: &[S], block: &[String]) -> Option<NearestMiss> {
233    if block.is_empty() || lines.is_empty() {
234        return None;
235    }
236    let first_anchors = |src: &str| {
237        if is_blank(&block[0]) {
238            is_blank(src)
239        } else {
240            src == block[0]
241        }
242    };
243    // best = (block lines consumed before divergence, start, source index there)
244    let mut best: Option<(usize, usize, usize)> = None;
245    for start in 0..lines.len() {
246        if !first_anchors(lines[start].as_ref()) {
247            continue;
248        }
249        if let Err((bi, li)) = align_squeezed(lines, block, start)
250            && best.is_none_or(|(blen, _, _)| bi > blen)
251        {
252            best = Some((bi, start, li));
253        }
254    }
255    if let Some((bi, start, li)) = best {
256        let found = lines
257            .get(li)
258            .map(|l| l.as_ref().to_string())
259            .unwrap_or_default();
260        return Some(NearestMiss {
261            line: start + 1,
262            first_diverging_line: bi + 1,
263            expected: block.get(bi).cloned().unwrap_or_default(),
264            found,
265            block_len: block.len(),
266        });
267    }
268    let want = block[0].trim();
269    if want.is_empty() {
270        return None;
271    }
272    lines
273        .iter()
274        .position(|l| l.as_ref().trim() == want)
275        .map(|i| NearestMiss {
276            line: i + 1,
277            first_diverging_line: 1,
278            expected: block[0].clone(),
279            found: lines[i].as_ref().to_string(),
280            block_len: block.len(),
281        })
282}
283
284use crate::edit::Site;
285
286/// Replace every non-overlapping occurrence of `block` in `content` with
287/// `replacement` lines, preserving every untouched byte (including a missing
288/// final newline). An empty `replacement` deletes the matched lines entirely.
289/// Returns the new content, the occurrence count, and the changed sites
290/// (`line` is the block's 1-based start; `before`/`after` are newline-joined).
291///
292/// # Examples
293///
294/// ```
295/// use coding_tools::block::edit_blocks;
296///
297/// let block = vec!["b".to_string(), "c".to_string()];
298/// let repl = vec!["X".to_string()];
299/// let (out, n, sites) = edit_blocks("f", "a\nb\nc\nd\n", &block, &repl);
300/// assert_eq!(out, "a\nX\nd\n");
301/// assert_eq!(n, 1);
302/// assert_eq!(sites[0].line, 2);
303///
304/// // Empty replacement deletes the block's lines.
305/// let (out, _, _) = edit_blocks("f", "a\nb\nc\nd\n", &block, &[]);
306/// assert_eq!(out, "a\nd\n");
307/// ```
308pub fn edit_blocks(
309    path: &str,
310    content: &str,
311    block: &[String],
312    replacement: &[String],
313) -> (String, usize, Vec<Site>) {
314    edit_blocks_with(path, content, block, replacement, false)
315}
316
317/// [`edit_blocks`], with optional blank-run `squeeze`ing of the match (see
318/// [`find_spans_squeezed`]). Under squeeze the replaced source span can be
319/// longer than the block, so each [`Site::before`] carries the *actual* matched
320/// source lines (identical to the block in the exact path).
321pub fn edit_blocks_with(
322    path: &str,
323    content: &str,
324    block: &[String],
325    replacement: &[String],
326    squeeze: bool,
327) -> (String, usize, Vec<Site>) {
328    // Split into (body, terminator) per line so untouched bytes round-trip.
329    let segments: Vec<(&str, &str)> = content
330        .split_inclusive('\n')
331        .map(|seg| match seg.strip_suffix('\n') {
332            Some(b) => (b, "\n"),
333            None => (seg, ""),
334        })
335        .collect();
336    let bodies: Vec<&str> = segments.iter().map(|(b, _)| *b).collect();
337    // Each match is a (start, source-line count) span. Exact matching always
338    // spans exactly `block.len()` lines; squeezing can span more.
339    let spans: Vec<(usize, usize)> = if squeeze {
340        find_spans_squeezed(&bodies, block)
341    } else {
342        find_starts(&bodies, block)
343            .into_iter()
344            .map(|s| (s, block.len()))
345            .collect()
346    };
347    if spans.is_empty() {
348        return (content.to_string(), 0, Vec::new());
349    }
350
351    let mut out = String::with_capacity(content.len());
352    let mut sites = Vec::new();
353    let mut next = spans.iter().peekable();
354    let mut i = 0usize;
355    while i < segments.len() {
356        if next.peek().is_some_and(|(s, _)| *s == i) {
357            let (_, span) = *next.next().unwrap();
358            // The terminator after the block: taken from its last line, so a
359            // block ending at EOF-without-newline stays unterminated.
360            let last_nl = segments[i + span - 1].1;
361            for (r, rl) in replacement.iter().enumerate() {
362                out.push_str(rl);
363                out.push_str(if r + 1 == replacement.len() {
364                    last_nl
365                } else {
366                    "\n"
367                });
368            }
369            let before = segments[i..i + span]
370                .iter()
371                .map(|(b, _)| *b)
372                .collect::<Vec<_>>()
373                .join("\n");
374            sites.push(Site {
375                path: path.to_string(),
376                line: i + 1,
377                before,
378                after: replacement.join("\n"),
379            });
380            i += span;
381        } else {
382            out.push_str(segments[i].0);
383            out.push_str(segments[i].1);
384            i += 1;
385        }
386    }
387
388    (out, spans.len(), sites)
389}
390
391#[cfg(test)]
392mod tests {
393    use super::*;
394
395    fn block(lines: &[&str]) -> Vec<String> {
396        lines.iter().map(|s| s.to_string()).collect()
397    }
398
399    #[test]
400    fn matches_are_byte_exact_and_non_overlapping() {
401        let lines = ["a", "a", "a"];
402        assert_eq!(find_starts(&lines, &block(&["a", "a"])), vec![0]);
403        // Whitespace is significant.
404        assert!(find_starts(&["  x"], &block(&["x"])).is_empty());
405    }
406
407    #[test]
408    fn nearest_miss_reports_first_divergence() {
409        let lines = ["fn a() {", "    one();", "    two();", "}"];
410        let b = block(&["fn a() {", "    one();", "    three();"]);
411        let m = nearest_miss(&lines, &b).unwrap();
412        assert_eq!(m.line, 1);
413        assert_eq!(m.first_diverging_line, 3);
414        assert_eq!(m.expected, "    three();");
415        assert_eq!(m.found, "    two();");
416    }
417
418    #[test]
419    fn nearest_miss_diagnoses_whitespace_drift_on_the_anchor_line() {
420        let lines = ["\tindented();"];
421        let b = block(&["    indented();"]);
422        let m = nearest_miss(&lines, &b).unwrap();
423        assert_eq!(m.line, 1);
424        assert_eq!(m.first_diverging_line, 1);
425        assert_eq!(m.found, "\tindented();");
426    }
427
428    #[test]
429    fn nearest_miss_past_eof_reports_empty_found() {
430        let lines = ["a"];
431        let b = block(&["a", "b"]);
432        let m = nearest_miss(&lines, &b).unwrap();
433        assert_eq!((m.line, m.first_diverging_line), (1, 2));
434        assert_eq!(m.found, "");
435        assert_eq!(m.block_len, 2);
436    }
437
438    #[test]
439    fn nearest_miss_carries_block_len_and_blank_line_hint() {
440        // A phantom empty line 3 in the find block diverging against real source
441        // is exactly the trailing-newline failure mode — the hint should fire.
442        let lines = ["a", "fn x(", "    body,"];
443        let b = block(&["a", "fn x(", ""]);
444        let m = nearest_miss(&lines, &b).unwrap();
445        assert_eq!(m.first_diverging_line, 3);
446        assert_eq!(m.block_len, 3);
447        assert_eq!(m.expected, "");
448        let hint = m
449            .blank_line_hint()
450            .expect("empty expected line yields a hint");
451        assert!(hint.contains("line 3 (of 3)"), "{hint}");
452
453        // A non-empty divergence is an ordinary mismatch: no blank-line hint.
454        let b2 = block(&["a", "fn y("]);
455        let m2 = nearest_miss(&lines, &b2).unwrap();
456        assert!(m2.blank_line_hint().is_none());
457    }
458
459    #[test]
460    fn block_edit_preserves_missing_final_newline() {
461        let b = block(&["x"]);
462        let (out, n, _) = edit_blocks("f", "a\nx", &b, &block(&["y", "z"]));
463        assert_eq!(out, "a\ny\nz");
464        assert_eq!(n, 1);
465    }
466
467    #[test]
468    fn block_edit_replaces_multiple_sites() {
469        let b = block(&["x"]);
470        let (out, n, sites) = edit_blocks("f", "x\nm\nx\n", &b, &block(&["y"]));
471        assert_eq!(out, "y\nm\ny\n");
472        assert_eq!(n, 2);
473        assert_eq!(sites.iter().map(|s| s.line).collect::<Vec<_>>(), vec![1, 3]);
474    }
475
476    #[test]
477    fn squeeze_matches_blank_runs_of_any_length() {
478        // Anchor with one blank line; source has two — squeezing aligns them.
479        let lines = ["foo()", "", "", "bar()"];
480        let b = block(&["foo()", "", "bar()"]);
481        // Exact matching misses (1 blank != 2 blanks).
482        assert!(find_starts(&lines, &b).is_empty());
483        // Squeezed matching spans all four source lines from index 0.
484        assert_eq!(find_spans_squeezed(&lines, &b), vec![(0, 4)]);
485        // The reverse also holds: a 2-blank anchor matches a 1-blank source.
486        let lines2 = ["foo()", "", "bar()"];
487        let b2 = block(&["foo()", "", "", "bar()"]);
488        assert_eq!(find_spans_squeezed(&lines2, &b2), vec![(0, 3)]);
489        // Whitespace-only lines count as blank.
490        let lines3 = ["a", "   ", "\t", "b"];
491        let b3 = block(&["a", "", "b"]);
492        assert_eq!(find_spans_squeezed(&lines3, &b3), vec![(0, 4)]);
493    }
494
495    #[test]
496    fn squeeze_still_requires_at_least_one_blank_and_exact_nonblank() {
497        // A blank run in the anchor needs a blank in the source: none here.
498        let lines = ["a", "b"];
499        let b = block(&["a", "", "b"]);
500        assert!(find_spans_squeezed(&lines, &b).is_empty());
501        // Non-blank lines are still byte-exact.
502        let lines2 = ["a", "", "B"];
503        let b2 = block(&["a", "", "b"]);
504        assert!(find_spans_squeezed(&lines2, &b2).is_empty());
505    }
506
507    #[test]
508    fn squeeze_edit_replaces_the_full_source_span() {
509        // Two source blanks collapse into whatever the replacement specifies;
510        // the matched span (4 lines) is what gets replaced, and the site's
511        // `before` reflects the real source, not the anchor.
512        let b = block(&["foo()", "", "bar()"]);
513        let repl = block(&["foo()", "", "bar()"]);
514        let (out, n, sites) = edit_blocks_with("f", "foo()\n\n\nbar()\nrest\n", &b, &repl, true);
515        assert_eq!(n, 1);
516        assert_eq!(out, "foo()\n\nbar()\nrest\n");
517        assert_eq!(sites[0].before, "foo()\n\n\nbar()");
518    }
519
520    #[test]
521    fn squeeze_nearest_miss_diverges_on_the_nonblank_line() {
522        // foo() and the blank run align; `baz()` diverges from `bar()`.
523        let lines = ["foo()", "", "", "bar()"];
524        let b = block(&["foo()", "", "baz()"]);
525        let m = nearest_miss_with(&lines, &b, true).unwrap();
526        assert_eq!(m.first_diverging_line, 3);
527        assert_eq!(m.expected, "baz()");
528        assert_eq!(m.found, "bar()");
529        assert_eq!(m.line, 1);
530    }
531}