use std::fs;
use std::path::{Path, PathBuf};
use rayon::prelude::*;
use tracing::{debug, trace};
use crate::error::{Error, IoCtx, Result};
use crate::pattern::{CompiledPattern, PatternOptions};
#[cfg(feature = "script")]
use crate::rewrite::rewrite_text_scripted;
use crate::rewrite::{RewriteOutcome, label_for_path, rewrite_text, unified_diff};
#[cfg(feature = "script")]
use crate::script::ScriptRewriter;
use crate::walker::{WalkOptions, walk_paths};
#[derive(Debug, Clone)]
pub struct PlanOptions {
pub pattern_options: PatternOptions,
pub walk_options: WalkOptions,
pub at_least: Option<usize>,
pub at_most: Option<usize>,
pub allow_non_convergent: bool,
pub max_bytes: u64,
pub max_files: usize,
}
impl Default for PlanOptions {
fn default() -> Self {
Self {
pattern_options: PatternOptions::default(),
walk_options: WalkOptions::default(),
at_least: Some(1),
at_most: None,
allow_non_convergent: false,
max_bytes: 10 * 1024 * 1024,
max_files: 1000,
}
}
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
pub struct FileChange {
pub path: PathBuf,
pub matches: usize,
#[cfg_attr(feature = "serde", serde(skip))]
pub after: String,
pub diff: String,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
pub enum PlanOutcome {
Changes,
AlreadyApplied,
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
pub struct Plan {
pub changes: Vec<FileChange>,
pub total_matches: usize,
pub files_scanned: usize,
pub outcome: PlanOutcome,
}
pub fn plan_rewrite<P: AsRef<Path>>(
pattern: &str,
replacement: &str,
roots: &[P],
opts: &PlanOptions,
) -> Result<Plan> {
let compiled = CompiledPattern::compile(pattern, replacement, &opts.pattern_options)?;
debug!(pattern, "compiled regex");
let files = scan(roots, opts)?;
let files_scanned = files.len();
let results: Vec<Result<Option<FileChange>>> = files
.par_iter()
.map(|path| {
process_one(
&compiled,
path,
opts,
|p, s| Ok(rewrite_text(p, s)),
regex_convergence_check,
)
})
.collect();
let changes = collect_changes(results)?;
finalize_plan(changes, compiled.is_convergent(), files_scanned, opts)
}
fn regex_convergence_check(pattern: &CompiledPattern, after: &str) -> Result<usize> {
Ok(pattern.regex().find_iter(after).count())
}
#[cfg(feature = "script")]
pub fn plan_rewrite_scripted<P: AsRef<Path>>(
pattern: &str,
script: &ScriptRewriter,
roots: &[P],
opts: &PlanOptions,
) -> Result<Plan> {
let compiled = CompiledPattern::compile(pattern, "", &opts.pattern_options)?;
debug!(pattern, "compiled regex (scripted)");
let files = scan(roots, opts)?;
let files_scanned = files.len();
let results: Vec<Result<Option<FileChange>>> = files
.par_iter()
.map_init(
|| script.fresh(),
|worker, path| {
let rewrite = |p: &CompiledPattern, s: &str| rewrite_text_scripted(p, worker, s);
let converge = |p: &CompiledPattern, s: &str| -> Result<usize> {
let outcome = rewrite_text_scripted(p, worker, s)?;
Ok(if outcome.after != s { outcome.matches } else { 0 })
};
process_one(&compiled, path, opts, rewrite, converge)
},
)
.collect();
let changes = collect_changes(results)?;
finalize_plan(changes, true, files_scanned, opts)
}
fn scan<P: AsRef<Path>>(roots: &[P], opts: &PlanOptions) -> Result<Vec<PathBuf>> {
let files = walk_paths(roots, &opts.walk_options)?;
debug!(files_scanned = files.len(), "walk completed");
if files.len() > opts.max_files {
return Err(Error::TooManyFiles { count: files.len(), limit: opts.max_files });
}
Ok(files)
}
fn collect_changes(results: Vec<Result<Option<FileChange>>>) -> Result<Vec<FileChange>> {
let mut changes = Vec::new();
for r in results {
if let Some(change) = r? {
changes.push(change);
}
}
Ok(changes)
}
fn finalize_plan(
changes: Vec<FileChange>,
convergent_or_scripted: bool,
files_scanned: usize,
opts: &PlanOptions,
) -> Result<Plan> {
let total_matches: usize = changes.iter().map(|c| c.matches).sum();
debug!(files_changed = changes.len(), total_matches, "rewrite plan ready");
if total_matches == 0 && convergent_or_scripted {
debug!("already applied (zero matches)");
return Ok(Plan {
changes: Vec::new(),
total_matches: 0,
files_scanned,
outcome: PlanOutcome::AlreadyApplied,
});
}
check_match_counts(total_matches, opts.at_least, opts.at_most)?;
Ok(Plan { changes, total_matches, files_scanned, outcome: PlanOutcome::Changes })
}
pub fn check_match_counts(
found: usize,
at_least: Option<usize>,
at_most: Option<usize>,
) -> Result<()> {
if let Some(min) = at_least
&& found < min
{
return Err(Error::TooFewMatches { found, required: min });
}
if let Some(max) = at_most
&& found > max
{
return Err(Error::TooManyMatches { found, allowed: max });
}
Ok(())
}
fn process_one<R, C>(
pattern: &CompiledPattern,
path: &Path,
opts: &PlanOptions,
rewrite: R,
convergence_check: C,
) -> Result<Option<FileChange>>
where
R: Fn(&CompiledPattern, &str) -> Result<RewriteOutcome>,
C: Fn(&CompiledPattern, &str) -> Result<usize>,
{
let before = match read_text_or_skip_binary(path, opts.max_bytes)? {
Some(s) => s,
None => return Ok(None),
};
let outcome = rewrite(pattern, &before)?;
if outcome.matches == 0 || outcome.after == before {
return Ok(None);
}
trace!(path = %path.display(), matches = outcome.matches, "file would change");
if !opts.allow_non_convergent {
let extra = convergence_check(pattern, &outcome.after)?;
if extra > 0 {
return Err(Error::NonConvergent { path: path.to_path_buf(), extra });
}
}
let label = label_for_path(path);
let diff = unified_diff(&label, &before, &outcome.after);
Ok(Some(FileChange {
path: path.to_path_buf(),
matches: outcome.matches,
after: outcome.after,
diff,
}))
}
pub(crate) fn read_text_or_skip_binary(path: &Path, max_bytes: u64) -> Result<Option<String>> {
let metadata = fs::metadata(path).io_ctx(path)?;
if metadata.len() > max_bytes {
return Err(Error::FileTooLarge {
path: path.to_path_buf(),
size: metadata.len(),
limit: max_bytes,
});
}
match fs::read_to_string(path) {
Ok(s) => Ok(Some(s)),
Err(e) if e.kind() == std::io::ErrorKind::InvalidData => Ok(None),
Err(e) => Err(Error::Io { path: path.to_path_buf(), source: e }),
}
}
#[cfg(test)]
#[path = "plan_tests.rs"]
mod tests;