Skip to main content

recast_core/
plan.rs

1//! End-to-end planning: walk → compile → rewrite → guard → check
2//! convergence.
3//!
4//! [`plan_rewrite`] is the single entry point most callers want. It
5//! produces a [`Plan`] describing every file that would change without
6//! touching the filesystem; pass the plan to
7//! [`crate::apply_changes`] to commit.
8
9use std::fs::{self, Permissions};
10use std::path::{Path, PathBuf};
11
12use rayon::prelude::*;
13use tracing::{debug, trace};
14
15use crate::error::{Error, IoCtx, Result};
16use crate::pattern::{CompiledPattern, PatternOptions};
17#[cfg(feature = "script")]
18use crate::rewrite::rewrite_text_scripted;
19use crate::rewrite::{RewriteOutcome, label_for_path, rewrite_text, unified_diff};
20#[cfg(feature = "script")]
21use crate::script::ScriptRewriter;
22use crate::walker::{WalkOptions, walk_paths};
23
24/// Knobs controlling a single [`plan_rewrite`] invocation.
25///
26/// Defaults are tuned for safety-by-default LLM use: `at_least = Some(1)`
27/// makes a silent zero-match impossible, `max_bytes = 10 MiB`, and
28/// `max_files = 1000` keep runaway pattern matches in check.
29#[derive(Debug, Clone)]
30pub struct PlanOptions {
31    pub pattern_options: PatternOptions,
32    pub walk_options: WalkOptions,
33    /// Inclusive lower bound on total matches across all files. `None`
34    /// disables the guard; `Some(0)` accepts zero-match runs explicitly.
35    pub at_least: Option<usize>,
36    /// Inclusive upper bound on total matches. `None` = unbounded.
37    pub at_most: Option<usize>,
38    /// Skip the convergence (idempotency) check. Off by default — a
39    /// pattern like `a` → `aa` is rejected so re-runs cannot accidentally
40    /// grow the file.
41    pub allow_non_convergent: bool,
42    /// Skip the post-rewrite syntax-regression guard. Off by default —
43    /// a rewrite whose output introduces *new* tree-sitter parse errors
44    /// (relative to the pre-image) is rejected. Only files whose
45    /// extension maps to a compiled grammar are checked; everything
46    /// else passes through unguarded.
47    pub allow_syntax_errors: bool,
48    /// Refuse to read any file larger than this many bytes.
49    pub max_bytes: u64,
50    /// Refuse to plan if the walk turns up more files than this.
51    pub max_files: usize,
52}
53
54impl Default for PlanOptions {
55    fn default() -> Self {
56        Self {
57            pattern_options: PatternOptions::default(),
58            walk_options: WalkOptions::default(),
59            at_least: Some(1),
60            at_most: None,
61            allow_non_convergent: false,
62            allow_syntax_errors: false,
63            max_bytes: 10 * 1024 * 1024,
64            max_files: 1000,
65        }
66    }
67}
68
69/// One file's worth of pending rewrite work. `after` is the full
70/// post-image used by [`crate::apply_changes`]; `diff` is the
71/// already-rendered unified-diff string. The pre-image is dropped
72/// after the diff is built — `apply_changes` reads from `after`, not
73/// from the original on disk, so retaining the pre-image would just
74/// double the planner's peak memory. `permissions` is captured during
75/// the planner's metadata read so [`crate::apply_changes`] doesn't
76/// have to issue a second `fs::metadata` syscall just to preserve
77/// the mode bits.
78#[derive(Debug, Clone)]
79#[cfg_attr(feature = "serde", derive(serde::Serialize))]
80pub struct FileChange {
81    pub path: PathBuf,
82    pub matches: usize,
83    #[cfg_attr(feature = "serde", serde(skip))]
84    pub after: String,
85    pub diff: String,
86    #[cfg_attr(feature = "serde", serde(skip))]
87    pub permissions: Option<Permissions>,
88}
89
90/// Top-level result classification for a [`Plan`].
91///
92/// `Changes` — at least one file would be rewritten.
93/// `AlreadyApplied` — zero matches across the whole scan *and* the
94/// pattern is convergent (re-applying it to its own replacement would
95/// produce no further change), so the run is treated as a successful
96/// no-op rather than a guard violation.
97#[derive(Debug, Clone, Copy, PartialEq, Eq)]
98#[cfg_attr(feature = "serde", derive(serde::Serialize))]
99#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
100pub enum PlanOutcome {
101    Changes,
102    AlreadyApplied,
103}
104
105/// Output of [`plan_rewrite`]. Pass to [`crate::apply_changes`] to commit.
106#[derive(Debug, Clone)]
107#[cfg_attr(feature = "serde", derive(serde::Serialize))]
108pub struct Plan {
109    pub changes: Vec<FileChange>,
110    pub total_matches: usize,
111    pub files_scanned: usize,
112    pub outcome: PlanOutcome,
113}
114
115/// Walk `roots`, compile `pattern`, and produce a [`Plan`] of every file
116/// that would change when `replacement` is substituted. Honors the
117/// match-count guard, the convergence check, and the file/byte limits in
118/// `opts`. No filesystem writes happen here.
119pub fn plan_rewrite<P: AsRef<Path>>(
120    pattern: &str,
121    replacement: &str,
122    roots: &[P],
123    opts: &PlanOptions,
124) -> Result<Plan> {
125    let compiled = CompiledPattern::compile(pattern, replacement, &opts.pattern_options)?;
126    debug!(pattern, "compiled regex");
127    let files = scan(roots, opts)?;
128    let files_scanned = files.len();
129
130    let results: Vec<Result<Option<FileChange>>> = files
131        .par_iter()
132        .map(|path| {
133            process_one(
134                &compiled,
135                path,
136                opts,
137                |p, s| Ok(rewrite_text(p, s)),
138                regex_convergence_check,
139            )
140        })
141        .collect();
142    let changes = collect_changes(results)?;
143    finalize_plan(changes, compiled.is_convergent(), files_scanned, opts)
144}
145
146fn regex_convergence_check(pattern: &CompiledPattern, after: &str) -> Result<usize> {
147    Ok(pattern.regex().find_iter(after).count())
148}
149
150/// Like [`plan_rewrite`] but each match drives a Rhai script callback
151/// instead of a static template. The pattern's `replacement` field is
152/// ignored.
153///
154/// Each rayon worker gets its own sandboxed Rhai `Engine` (via
155/// [`ScriptRewriter::fresh`]) because `Engine` is `!Sync`; the compiled
156/// AST is shared by reference across workers.
157#[cfg(feature = "script")]
158pub fn plan_rewrite_scripted<P: AsRef<Path>>(
159    pattern: &str,
160    script: &ScriptRewriter,
161    roots: &[P],
162    opts: &PlanOptions,
163) -> Result<Plan> {
164    let compiled = CompiledPattern::compile(pattern, "", &opts.pattern_options)?;
165    debug!(pattern, "compiled regex (scripted)");
166    let files = scan(roots, opts)?;
167    let files_scanned = files.len();
168
169    let results: Vec<Result<Option<FileChange>>> = files
170        .par_iter()
171        .map_init(
172            || script.fresh(),
173            |worker, path| {
174                let rewrite = |p: &CompiledPattern, s: &str| rewrite_text_scripted(p, worker, s);
175                let converge = |p: &CompiledPattern, s: &str| -> Result<usize> {
176                    let outcome = rewrite_text_scripted(p, worker, s)?;
177                    Ok(if outcome.after != s { outcome.matches } else { 0 })
178                };
179                process_one(&compiled, path, opts, rewrite, converge)
180            },
181        )
182        .collect();
183    let changes = collect_changes(results)?;
184    // Scripts can't be probed statically; trust the per-file dynamic
185    // convergence check inside process_one and treat zero matches as
186    // an already-applied no-op.
187    finalize_plan(changes, true, files_scanned, opts)
188}
189
190fn scan<P: AsRef<Path>>(roots: &[P], opts: &PlanOptions) -> Result<Vec<PathBuf>> {
191    let files = walk_paths(roots, &opts.walk_options)?;
192    debug!(files_scanned = files.len(), "walk completed");
193    if files.len() > opts.max_files {
194        return Err(Error::TooManyFiles { count: files.len(), limit: opts.max_files });
195    }
196    Ok(files)
197}
198
199fn collect_changes(results: Vec<Result<Option<FileChange>>>) -> Result<Vec<FileChange>> {
200    let mut changes = Vec::new();
201    for r in results {
202        if let Some(change) = r? {
203            changes.push(change);
204        }
205    }
206    Ok(changes)
207}
208
209fn finalize_plan(
210    changes: Vec<FileChange>,
211    convergent_or_scripted: bool,
212    files_scanned: usize,
213    opts: &PlanOptions,
214) -> Result<Plan> {
215    let total_matches: usize = changes.iter().map(|c| c.matches).sum();
216    debug!(files_changed = changes.len(), total_matches, "rewrite plan ready");
217
218    if total_matches == 0 && convergent_or_scripted {
219        debug!("already applied (zero matches)");
220        return Ok(Plan {
221            changes: Vec::new(),
222            total_matches: 0,
223            files_scanned,
224            outcome: PlanOutcome::AlreadyApplied,
225        });
226    }
227
228    check_match_counts(total_matches, opts.at_least, opts.at_most)?;
229
230    Ok(Plan { changes, total_matches, files_scanned, outcome: PlanOutcome::Changes })
231}
232
233/// Enforce the `--at-least` / `--at-most` match-count guard. Returns
234/// [`Error::TooFewMatches`] / [`Error::TooManyMatches`] when the
235/// guard is violated; both variants map to the
236/// `EXIT_GUARD_VIOLATED` (2) exit code at the binary boundary.
237pub fn check_match_counts(
238    found: usize,
239    at_least: Option<usize>,
240    at_most: Option<usize>,
241) -> Result<()> {
242    if let Some(min) = at_least
243        && found < min
244    {
245        return Err(Error::TooFewMatches { found, required: min });
246    }
247    if let Some(max) = at_most
248        && found > max
249    {
250        return Err(Error::TooManyMatches { found, allowed: max });
251    }
252    Ok(())
253}
254
255fn process_one<R, C>(
256    pattern: &CompiledPattern,
257    path: &Path,
258    opts: &PlanOptions,
259    rewrite: R,
260    convergence_check: C,
261) -> Result<Option<FileChange>>
262where
263    R: Fn(&CompiledPattern, &str) -> Result<RewriteOutcome>,
264    C: Fn(&CompiledPattern, &str) -> Result<usize>,
265{
266    let (before, permissions) = match read_text_or_skip_binary(path, opts.max_bytes)? {
267        Some(pair) => pair,
268        None => return Ok(None),
269    };
270
271    let outcome = rewrite(pattern, &before)?;
272    if outcome.matches == 0 || outcome.after == before {
273        return Ok(None);
274    }
275    trace!(path = %path.display(), matches = outcome.matches, "file would change");
276
277    if !opts.allow_non_convergent {
278        let extra = convergence_check(pattern, &outcome.after)?;
279        if extra > 0 {
280            return Err(Error::NonConvergent { path: path.to_path_buf(), extra });
281        }
282    }
283
284    #[cfg(any(
285        feature = "lang-rust",
286        feature = "lang-ts",
287        feature = "lang-js",
288        feature = "lang-python",
289    ))]
290    if !opts.allow_syntax_errors {
291        crate::structural::guard_syntax(path, &before, &outcome.after)?;
292    }
293
294    let label = label_for_path(path);
295    let diff = unified_diff(&label, &before, &outcome.after);
296    Ok(Some(FileChange {
297        path: path.to_path_buf(),
298        matches: outcome.matches,
299        after: outcome.after,
300        diff,
301        permissions: Some(permissions),
302    }))
303}
304
305/// Read a candidate file, enforce the per-file byte limit, and yield
306/// `None` for paths whose contents aren't valid UTF-8 (binary skip).
307/// Returns the file contents alongside the permissions captured from
308/// the same metadata call so the commit phase doesn't have to stat the
309/// file again. Shared by the regex and structural pipelines.
310pub(crate) fn read_text_or_skip_binary(
311    path: &Path,
312    max_bytes: u64,
313) -> Result<Option<(String, Permissions)>> {
314    let metadata = fs::metadata(path).io_ctx(path)?;
315    if metadata.len() > max_bytes {
316        return Err(Error::FileTooLarge {
317            path: path.to_path_buf(),
318            size: metadata.len(),
319            limit: max_bytes,
320        });
321    }
322    let permissions = metadata.permissions();
323    match fs::read_to_string(path) {
324        Ok(s) => Ok(Some((s, permissions))),
325        Err(e) if e.kind() == std::io::ErrorKind::InvalidData => Ok(None),
326        Err(e) => Err(Error::Io { path: path.to_path_buf(), source: e }),
327    }
328}
329
330#[cfg(test)]
331#[path = "plan_tests.rs"]
332mod tests;