Skip to main content

coding_tools/
block.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright 2026 Jonathan Shook
3
4//! Line-anchored literal block matching, shared by `ct-search`, `ct-view`,
5//! and `ct-edit`.
6//!
7//! A multi-line pattern matches as a *block*: a find block of K lines matches
8//! K consecutive source lines exactly, byte-for-byte, leading and trailing
9//! whitespace significant. When a block fails to match, [`nearest_miss`]
10//! reports the best partial alignment — the candidate with the longest
11//! matching prefix and the first diverging line — so the author sees *why*
12//! the anchor missed (whitespace drift, a comment edit, an already-applied
13//! change) without bisecting by hand.
14
15/// The best partial alignment of a block that did not match.
16#[derive(Debug, Clone, PartialEq, Eq)]
17pub struct NearestMiss {
18    /// 1-based source line where the best candidate alignment starts.
19    pub line: usize,
20    /// 1-based index *into the block* of the first diverging line.
21    pub first_diverging_line: usize,
22    /// The block line that was expected at the divergence.
23    pub expected: String,
24    /// The source line actually found there (empty past end of file).
25    pub found: String,
26}
27
28/// Find every non-overlapping occurrence of `block` in `lines`, scanning
29/// forward. Returns the 0-based start indices.
30///
31/// # Examples
32///
33/// ```
34/// use coding_tools::block::find_starts;
35///
36/// let lines = ["a", "b", "c", "a", "b"];
37/// let block = ["a".to_string(), "b".to_string()];
38/// assert_eq!(find_starts(&lines, &block), vec![0, 3]);
39/// ```
40pub fn find_starts<S: AsRef<str>>(lines: &[S], block: &[String]) -> Vec<usize> {
41    let k = block.len();
42    if k == 0 || lines.len() < k {
43        return Vec::new();
44    }
45    let mut starts = Vec::new();
46    let mut i = 0usize;
47    while i + k <= lines.len() {
48        if block.iter().zip(&lines[i..i + k]).all(|(b, l)| b == l.as_ref()) {
49            starts.push(i);
50            i += k; // non-overlapping: continue past the match
51        } else {
52            i += 1;
53        }
54    }
55    starts
56}
57
58/// Report the best partial alignment of an unmatched `block` against `lines`:
59/// the start with the longest run of matching leading block lines (ties go to
60/// the earliest). When no line equals the block's first line at all, falls
61/// back to a whitespace-insensitive scan of that first line, so indentation
62/// drift — the most common anchor failure — is still diagnosed.
63pub fn nearest_miss<S: AsRef<str>>(lines: &[S], block: &[String]) -> Option<NearestMiss> {
64    if block.is_empty() || lines.is_empty() {
65        return None;
66    }
67    let mut best: Option<(usize, usize)> = None; // (matched_prefix_len, start)
68    for start in 0..lines.len() {
69        if lines[start].as_ref() != block[0] {
70            continue;
71        }
72        let mut len = 0usize;
73        while len < block.len()
74            && start + len < lines.len()
75            && lines[start + len].as_ref() == block[len]
76        {
77            len += 1;
78        }
79        if best.is_none_or(|(blen, _)| len > blen) {
80            best = Some((len, start));
81        }
82    }
83    if let Some((len, start)) = best {
84        // len == block.len() would have been a match; here it is a prefix.
85        let found = lines
86            .get(start + len)
87            .map(|l| l.as_ref().to_string())
88            .unwrap_or_default();
89        return Some(NearestMiss {
90            line: start + 1,
91            first_diverging_line: len + 1,
92            expected: block.get(len).cloned().unwrap_or_default(),
93            found,
94        });
95    }
96    // No exact first-line anchor anywhere: diagnose whitespace drift on the
97    // first line if a trim-equal candidate exists.
98    let want = block[0].trim();
99    if want.is_empty() {
100        return None;
101    }
102    lines
103        .iter()
104        .position(|l| l.as_ref().trim() == want)
105        .map(|i| NearestMiss {
106            line: i + 1,
107            first_diverging_line: 1,
108            expected: block[0].clone(),
109            found: lines[i].as_ref().to_string(),
110        })
111}
112
113use crate::edit::Site;
114
115/// Replace every non-overlapping occurrence of `block` in `content` with
116/// `replacement` lines, preserving every untouched byte (including a missing
117/// final newline). An empty `replacement` deletes the matched lines entirely.
118/// Returns the new content, the occurrence count, and the changed sites
119/// (`line` is the block's 1-based start; `before`/`after` are newline-joined).
120///
121/// # Examples
122///
123/// ```
124/// use coding_tools::block::edit_blocks;
125///
126/// let block = vec!["b".to_string(), "c".to_string()];
127/// let repl = vec!["X".to_string()];
128/// let (out, n, sites) = edit_blocks("f", "a\nb\nc\nd\n", &block, &repl);
129/// assert_eq!(out, "a\nX\nd\n");
130/// assert_eq!(n, 1);
131/// assert_eq!(sites[0].line, 2);
132///
133/// // Empty replacement deletes the block's lines.
134/// let (out, _, _) = edit_blocks("f", "a\nb\nc\nd\n", &block, &[]);
135/// assert_eq!(out, "a\nd\n");
136/// ```
137pub fn edit_blocks(
138    path: &str,
139    content: &str,
140    block: &[String],
141    replacement: &[String],
142) -> (String, usize, Vec<Site>) {
143    // Split into (body, terminator) per line so untouched bytes round-trip.
144    let segments: Vec<(&str, &str)> = content
145        .split_inclusive('\n')
146        .map(|seg| match seg.strip_suffix('\n') {
147            Some(b) => (b, "\n"),
148            None => (seg, ""),
149        })
150        .collect();
151    let bodies: Vec<&str> = segments.iter().map(|(b, _)| *b).collect();
152    let starts = find_starts(&bodies, block);
153    if starts.is_empty() {
154        return (content.to_string(), 0, Vec::new());
155    }
156
157    let mut out = String::with_capacity(content.len());
158    let mut sites = Vec::new();
159    let mut next = starts.iter().peekable();
160    let mut i = 0usize;
161    while i < segments.len() {
162        if next.peek() == Some(&&i) {
163            next.next();
164            // The terminator after the block: taken from its last line, so a
165            // block ending at EOF-without-newline stays unterminated.
166            let last_nl = segments[i + block.len() - 1].1;
167            for (r, rl) in replacement.iter().enumerate() {
168                out.push_str(rl);
169                out.push_str(if r + 1 == replacement.len() { last_nl } else { "\n" });
170            }
171            sites.push(Site {
172                path: path.to_string(),
173                line: i + 1,
174                before: block.join("\n"),
175                after: replacement.join("\n"),
176            });
177            i += block.len();
178        } else {
179            out.push_str(segments[i].0);
180            out.push_str(segments[i].1);
181            i += 1;
182        }
183    }
184
185    (out, starts.len(), sites)
186}
187
188#[cfg(test)]
189mod tests {
190    use super::*;
191
192    fn block(lines: &[&str]) -> Vec<String> {
193        lines.iter().map(|s| s.to_string()).collect()
194    }
195
196    #[test]
197    fn matches_are_byte_exact_and_non_overlapping() {
198        let lines = ["a", "a", "a"];
199        assert_eq!(find_starts(&lines, &block(&["a", "a"])), vec![0]);
200        // Whitespace is significant.
201        assert!(find_starts(&["  x"], &block(&["x"])).is_empty());
202    }
203
204    #[test]
205    fn nearest_miss_reports_first_divergence() {
206        let lines = ["fn a() {", "    one();", "    two();", "}"];
207        let b = block(&["fn a() {", "    one();", "    three();"]);
208        let m = nearest_miss(&lines, &b).unwrap();
209        assert_eq!(m.line, 1);
210        assert_eq!(m.first_diverging_line, 3);
211        assert_eq!(m.expected, "    three();");
212        assert_eq!(m.found, "    two();");
213    }
214
215    #[test]
216    fn nearest_miss_diagnoses_whitespace_drift_on_the_anchor_line() {
217        let lines = ["\tindented();"];
218        let b = block(&["    indented();"]);
219        let m = nearest_miss(&lines, &b).unwrap();
220        assert_eq!(m.line, 1);
221        assert_eq!(m.first_diverging_line, 1);
222        assert_eq!(m.found, "\tindented();");
223    }
224
225    #[test]
226    fn nearest_miss_past_eof_reports_empty_found() {
227        let lines = ["a"];
228        let b = block(&["a", "b"]);
229        let m = nearest_miss(&lines, &b).unwrap();
230        assert_eq!((m.line, m.first_diverging_line), (1, 2));
231        assert_eq!(m.found, "");
232    }
233
234    #[test]
235    fn block_edit_preserves_missing_final_newline() {
236        let b = block(&["x"]);
237        let (out, n, _) = edit_blocks("f", "a\nx", &b, &block(&["y", "z"]));
238        assert_eq!(out, "a\ny\nz");
239        assert_eq!(n, 1);
240    }
241
242    #[test]
243    fn block_edit_replaces_multiple_sites() {
244        let b = block(&["x"]);
245        let (out, n, sites) = edit_blocks("f", "x\nm\nx\n", &b, &block(&["y"]));
246        assert_eq!(out, "y\nm\ny\n");
247        assert_eq!(n, 2);
248        assert_eq!(sites.iter().map(|s| s.line).collect::<Vec<_>>(), vec![1, 3]);
249    }
250}