Skip to main content

coding_tools/
block.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright 2026 Jonathan Shook
3
4//! Line-anchored literal block matching, shared by `ct-search`, `ct-view`,
5//! and `ct-edit`.
6//!
7//! A multi-line pattern matches as a *block*: a find block of K lines matches
8//! K consecutive source lines exactly, byte-for-byte, leading and trailing
9//! whitespace significant. When a block fails to match, [`nearest_miss`]
10//! reports the best partial alignment — the candidate with the longest
11//! matching prefix and the first diverging line — so the author sees *why*
12//! the anchor missed (whitespace drift, a comment edit, an already-applied
13//! change) without bisecting by hand.
14
15/// The best partial alignment of a block that did not match.
16#[derive(Debug, Clone, PartialEq, Eq)]
17pub struct NearestMiss {
18    /// 1-based source line where the best candidate alignment starts.
19    pub line: usize,
20    /// 1-based index *into the block* of the first diverging line.
21    pub first_diverging_line: usize,
22    /// The block line that was expected at the divergence.
23    pub expected: String,
24    /// The source line actually found there (empty past end of file).
25    pub found: String,
26}
27
28/// Find every non-overlapping occurrence of `block` in `lines`, scanning
29/// forward. Returns the 0-based start indices.
30///
31/// # Examples
32///
33/// ```
34/// use coding_tools::block::find_starts;
35///
36/// let lines = ["a", "b", "c", "a", "b"];
37/// let block = ["a".to_string(), "b".to_string()];
38/// assert_eq!(find_starts(&lines, &block), vec![0, 3]);
39/// ```
40pub fn find_starts<S: AsRef<str>>(lines: &[S], block: &[String]) -> Vec<usize> {
41    let k = block.len();
42    if k == 0 || lines.len() < k {
43        return Vec::new();
44    }
45    let mut starts = Vec::new();
46    let mut i = 0usize;
47    while i + k <= lines.len() {
48        if block
49            .iter()
50            .zip(&lines[i..i + k])
51            .all(|(b, l)| b == l.as_ref())
52        {
53            starts.push(i);
54            i += k; // non-overlapping: continue past the match
55        } else {
56            i += 1;
57        }
58    }
59    starts
60}
61
62/// Report the best partial alignment of an unmatched `block` against `lines`:
63/// the start with the longest run of matching leading block lines (ties go to
64/// the earliest). When no line equals the block's first line at all, falls
65/// back to a whitespace-insensitive scan of that first line, so indentation
66/// drift — the most common anchor failure — is still diagnosed.
67pub fn nearest_miss<S: AsRef<str>>(lines: &[S], block: &[String]) -> Option<NearestMiss> {
68    if block.is_empty() || lines.is_empty() {
69        return None;
70    }
71    let mut best: Option<(usize, usize)> = None; // (matched_prefix_len, start)
72    for start in 0..lines.len() {
73        if lines[start].as_ref() != block[0] {
74            continue;
75        }
76        let mut len = 0usize;
77        while len < block.len()
78            && start + len < lines.len()
79            && lines[start + len].as_ref() == block[len]
80        {
81            len += 1;
82        }
83        if best.is_none_or(|(blen, _)| len > blen) {
84            best = Some((len, start));
85        }
86    }
87    if let Some((len, start)) = best {
88        // len == block.len() would have been a match; here it is a prefix.
89        let found = lines
90            .get(start + len)
91            .map(|l| l.as_ref().to_string())
92            .unwrap_or_default();
93        return Some(NearestMiss {
94            line: start + 1,
95            first_diverging_line: len + 1,
96            expected: block.get(len).cloned().unwrap_or_default(),
97            found,
98        });
99    }
100    // No exact first-line anchor anywhere: diagnose whitespace drift on the
101    // first line if a trim-equal candidate exists.
102    let want = block[0].trim();
103    if want.is_empty() {
104        return None;
105    }
106    lines
107        .iter()
108        .position(|l| l.as_ref().trim() == want)
109        .map(|i| NearestMiss {
110            line: i + 1,
111            first_diverging_line: 1,
112            expected: block[0].clone(),
113            found: lines[i].as_ref().to_string(),
114        })
115}
116
117use crate::edit::Site;
118
119/// Replace every non-overlapping occurrence of `block` in `content` with
120/// `replacement` lines, preserving every untouched byte (including a missing
121/// final newline). An empty `replacement` deletes the matched lines entirely.
122/// Returns the new content, the occurrence count, and the changed sites
123/// (`line` is the block's 1-based start; `before`/`after` are newline-joined).
124///
125/// # Examples
126///
127/// ```
128/// use coding_tools::block::edit_blocks;
129///
130/// let block = vec!["b".to_string(), "c".to_string()];
131/// let repl = vec!["X".to_string()];
132/// let (out, n, sites) = edit_blocks("f", "a\nb\nc\nd\n", &block, &repl);
133/// assert_eq!(out, "a\nX\nd\n");
134/// assert_eq!(n, 1);
135/// assert_eq!(sites[0].line, 2);
136///
137/// // Empty replacement deletes the block's lines.
138/// let (out, _, _) = edit_blocks("f", "a\nb\nc\nd\n", &block, &[]);
139/// assert_eq!(out, "a\nd\n");
140/// ```
141pub fn edit_blocks(
142    path: &str,
143    content: &str,
144    block: &[String],
145    replacement: &[String],
146) -> (String, usize, Vec<Site>) {
147    // Split into (body, terminator) per line so untouched bytes round-trip.
148    let segments: Vec<(&str, &str)> = content
149        .split_inclusive('\n')
150        .map(|seg| match seg.strip_suffix('\n') {
151            Some(b) => (b, "\n"),
152            None => (seg, ""),
153        })
154        .collect();
155    let bodies: Vec<&str> = segments.iter().map(|(b, _)| *b).collect();
156    let starts = find_starts(&bodies, block);
157    if starts.is_empty() {
158        return (content.to_string(), 0, Vec::new());
159    }
160
161    let mut out = String::with_capacity(content.len());
162    let mut sites = Vec::new();
163    let mut next = starts.iter().peekable();
164    let mut i = 0usize;
165    while i < segments.len() {
166        if next.peek() == Some(&&i) {
167            next.next();
168            // The terminator after the block: taken from its last line, so a
169            // block ending at EOF-without-newline stays unterminated.
170            let last_nl = segments[i + block.len() - 1].1;
171            for (r, rl) in replacement.iter().enumerate() {
172                out.push_str(rl);
173                out.push_str(if r + 1 == replacement.len() {
174                    last_nl
175                } else {
176                    "\n"
177                });
178            }
179            sites.push(Site {
180                path: path.to_string(),
181                line: i + 1,
182                before: block.join("\n"),
183                after: replacement.join("\n"),
184            });
185            i += block.len();
186        } else {
187            out.push_str(segments[i].0);
188            out.push_str(segments[i].1);
189            i += 1;
190        }
191    }
192
193    (out, starts.len(), sites)
194}
195
196#[cfg(test)]
197mod tests {
198    use super::*;
199
200    fn block(lines: &[&str]) -> Vec<String> {
201        lines.iter().map(|s| s.to_string()).collect()
202    }
203
204    #[test]
205    fn matches_are_byte_exact_and_non_overlapping() {
206        let lines = ["a", "a", "a"];
207        assert_eq!(find_starts(&lines, &block(&["a", "a"])), vec![0]);
208        // Whitespace is significant.
209        assert!(find_starts(&["  x"], &block(&["x"])).is_empty());
210    }
211
212    #[test]
213    fn nearest_miss_reports_first_divergence() {
214        let lines = ["fn a() {", "    one();", "    two();", "}"];
215        let b = block(&["fn a() {", "    one();", "    three();"]);
216        let m = nearest_miss(&lines, &b).unwrap();
217        assert_eq!(m.line, 1);
218        assert_eq!(m.first_diverging_line, 3);
219        assert_eq!(m.expected, "    three();");
220        assert_eq!(m.found, "    two();");
221    }
222
223    #[test]
224    fn nearest_miss_diagnoses_whitespace_drift_on_the_anchor_line() {
225        let lines = ["\tindented();"];
226        let b = block(&["    indented();"]);
227        let m = nearest_miss(&lines, &b).unwrap();
228        assert_eq!(m.line, 1);
229        assert_eq!(m.first_diverging_line, 1);
230        assert_eq!(m.found, "\tindented();");
231    }
232
233    #[test]
234    fn nearest_miss_past_eof_reports_empty_found() {
235        let lines = ["a"];
236        let b = block(&["a", "b"]);
237        let m = nearest_miss(&lines, &b).unwrap();
238        assert_eq!((m.line, m.first_diverging_line), (1, 2));
239        assert_eq!(m.found, "");
240    }
241
242    #[test]
243    fn block_edit_preserves_missing_final_newline() {
244        let b = block(&["x"]);
245        let (out, n, _) = edit_blocks("f", "a\nx", &b, &block(&["y", "z"]));
246        assert_eq!(out, "a\ny\nz");
247        assert_eq!(n, 1);
248    }
249
250    #[test]
251    fn block_edit_replaces_multiple_sites() {
252        let b = block(&["x"]);
253        let (out, n, sites) = edit_blocks("f", "x\nm\nx\n", &b, &block(&["y"]));
254        assert_eq!(out, "y\nm\ny\n");
255        assert_eq!(n, 2);
256        assert_eq!(sites.iter().map(|s| s.line).collect::<Vec<_>>(), vec![1, 3]);
257    }
258}