use regex::{Captures, Regex};
use std::{fmt::Debug, str::FromStr};
#[derive(thiserror::Error, Clone, Debug, PartialEq)]
pub(crate) enum SubstitutionError {
#[error("Empty substitution rule")]
Empty,
#[error("Invalid substitution rule format")]
InvalidFormat,
#[error("Invalid flag: {0}")]
InvalidFlag(char),
#[error(transparent)]
InvalidPattern(#[from] regex::Error),
}
#[derive(Clone, Debug)]
struct SubstitutionReplacer(String);
#[derive(Clone, Debug)]
pub(crate) struct SubstitutionRule {
pattern: Regex,
replacement: SubstitutionReplacer,
global: bool,
print: bool,
apply_to_hardlinks: bool,
apply_to_symlinks: bool,
apply_to_regular_files: bool,
from_begin: bool,
}
impl FromStr for SubstitutionRule {
type Err = SubstitutionError;
#[inline]
fn from_str(s: &str) -> Result<Self, Self::Err> {
Self::parse(s)
}
}
impl SubstitutionRule {
pub fn parse(rule: &str) -> Result<Self, SubstitutionError> {
let mut rule = rule.chars();
let delimiter = rule.next().ok_or(SubstitutionError::Empty)?;
let mut parts = rule.as_str().split(delimiter);
let pattern = parts.next().ok_or(SubstitutionError::InvalidFormat)?;
let replacement = parts.next().ok_or(SubstitutionError::InvalidFormat)?;
let flags = parts.next().ok_or(SubstitutionError::InvalidFormat)?;
let mut global = false;
let mut print = false;
let mut apply_to_hardlinks = true;
let mut apply_to_symlinks = true;
let mut apply_to_regular_files = true;
let mut from_begin = false;
for flag in flags.chars() {
match flag {
'g' | 'G' => global = true,
'p' | 'P' => print = true,
'b' | 'B' => from_begin = true,
's' => apply_to_symlinks = true,
'S' => apply_to_symlinks = false,
'h' => apply_to_hardlinks = true,
'H' => apply_to_hardlinks = false,
'r' => apply_to_regular_files = true,
'R' => apply_to_regular_files = false,
f => return Err(SubstitutionError::InvalidFlag(f)),
}
}
let regex = Regex::new(pattern)?;
Ok(Self {
pattern: regex,
replacement: SubstitutionReplacer(replacement.into()),
global,
print,
apply_to_hardlinks,
apply_to_symlinks,
apply_to_regular_files,
from_begin,
})
}
fn applies_to(&self, is_symlink: bool, is_hardlink: bool) -> bool {
if is_symlink && !self.apply_to_symlinks {
return false;
}
if is_hardlink && !self.apply_to_hardlinks {
return false;
}
if !is_symlink && !is_hardlink && !self.apply_to_regular_files {
return false;
}
true
}
fn append_replacement(&self, caps: &Captures<'_>, result: &mut String) {
let replacement = self.replacement.0.as_str();
let mut chars = replacement.chars();
while let Some(ch) = chars.next() {
if ch == '\\' {
match chars.next() {
Some('~') => result.push('~'),
Some('\\') => result.push('\\'),
Some(c @ '1'..='9') => {
let group_index = (c as usize) - ('0' as usize);
if let Some(m) = caps.get(group_index) {
result.push_str(m.as_str());
}
}
Some(c) => {
result.push('\\');
result.push(c);
}
None => result.push('\\'),
}
} else if ch == '~' {
result.push_str(&caps[0]);
} else {
result.push(ch);
}
}
}
}
#[derive(Clone, Debug)]
pub(crate) struct SubstitutionRules(Vec<SubstitutionRule>);
impl SubstitutionRules {
#[inline]
pub(crate) const fn new(rules: Vec<SubstitutionRule>) -> Self {
Self(rules)
}
#[inline]
pub(crate) fn apply(
&self,
name: impl Into<String>,
is_symlink: bool,
is_hardlink: bool,
) -> String {
apply_substitutions(name, &self.0, is_symlink, is_hardlink)
}
}
fn apply_substitutions(
name: impl Into<String>,
substitutions: &[SubstitutionRule],
is_symlink: bool,
is_hardlink: bool,
) -> String {
let original = name.into();
let mut source = original.clone();
let mut pos: usize = 0;
let mut result = String::new();
let mut got_match = false;
let mut print_match = false;
for rule in substitutions {
if !rule.applies_to(is_symlink, is_hardlink) {
continue;
}
if rule.from_begin && got_match {
result.push_str(&source[pos..]);
source = std::mem::take(&mut result);
pos = 0;
}
loop {
let remaining = &source[pos..];
let is_end = remaining.is_empty();
let Some(captures) = rule.pattern.captures(remaining) else {
break;
};
let m = captures.get(0).unwrap();
got_match = true;
print_match |= rule.print;
result.push_str(&remaining[..m.start()]);
rule.append_replacement(&captures, &mut result);
if m.end() > 0 {
pos += m.end();
} else if !is_end {
let advance = remaining.chars().next().map_or(0, |c| c.len_utf8());
result.push_str(&remaining[..advance]);
pos += advance;
}
if !rule.global || is_end {
break;
}
}
}
if got_match {
result.push_str(&source[pos..]);
if print_match {
eprintln!("{original} >> {result}");
}
result
} else {
source
}
}
#[cfg(test)]
mod tests {
use super::*;
fn rules(specs: &[&str]) -> SubstitutionRules {
SubstitutionRules::new(
specs
.iter()
.map(|s| SubstitutionRule::parse(s).unwrap())
.collect(),
)
}
#[test]
fn single_substitution() {
assert_eq!(
rules(&["/foo/bar/"]).apply("foo baz foo", false, false),
"bar baz foo"
);
}
#[test]
fn global_substitution() {
assert_eq!(
rules(&["/foo/bar/g"]).apply("foo baz foo", false, false),
"bar baz bar"
);
}
#[test]
fn parse_from_begin_flag() {
let substitution = SubstitutionRule::parse("/ar/az/b").unwrap();
assert!(substitution.from_begin);
}
#[test]
fn multi_rule_position_tracking() {
let rules = SubstitutionRules::new(vec![
SubstitutionRule::parse("/foo/bar/").unwrap(),
SubstitutionRule::parse("}bar}baz}").unwrap(),
]);
assert_eq!(rules.apply("in/d1/foo", false, false), "in/d1/bar");
assert_eq!(rules.apply("in/d1/bar", false, false), "in/d1/baz");
}
#[test]
fn multi_rule_name_swap() {
let rules = SubstitutionRules::new(vec![
SubstitutionRule::parse("/foo/bar/").unwrap(),
SubstitutionRule::parse("}bar}foo}").unwrap(),
]);
assert_eq!(rules.apply("in/d1/foo", false, false), "in/d1/bar");
assert_eq!(rules.apply("in/d1/bar", false, false), "in/d1/foo");
}
#[test]
fn multi_rule_with_from_begin_flag() {
let rules = SubstitutionRules::new(vec![
SubstitutionRule::parse("/oo/ar/").unwrap(),
SubstitutionRule::parse("}ar}az}b").unwrap(),
]);
assert_eq!(rules.apply("in/d1/foo", false, false), "in/d1/faz");
assert_eq!(rules.apply("in/d1/bar", false, false), "in/d1/baz");
}
#[test]
fn multi_rule_three_with_from_begin_flag() {
let rules = SubstitutionRules::new(vec![
SubstitutionRule::parse("/oo/ar/").unwrap(),
SubstitutionRule::parse("}ar}az}b").unwrap(),
SubstitutionRule::parse(":az:end:b").unwrap(),
]);
assert_eq!(rules.apply("in/d1/foo", false, false), "in/d1/fend");
assert_eq!(rules.apply("in/d1/bar", false, false), "in/d1/bend");
}
#[test]
fn apply_to_regular_files() {
assert_eq!(
rules(&["/foo/abc/r"]).apply("foo baz foo", false, false),
"abc baz foo"
);
}
#[test]
fn skip_regular_files() {
assert_eq!(
rules(&["/foo/abc/R"]).apply("foo baz foo", false, false),
"foo baz foo"
);
}
#[test]
fn apply_to_symlinks() {
assert_eq!(
rules(&["/foo/bar/s"]).apply("foo baz foo", true, false),
"bar baz foo"
);
}
#[test]
fn skip_symlinks() {
assert_eq!(
rules(&["/foo/bar/S"]).apply("foo baz foo", true, false),
"foo baz foo"
);
}
#[test]
fn apply_to_hardlinks() {
assert_eq!(
rules(&["/foo/bar/h"]).apply("foo baz foo", false, true),
"bar baz foo"
);
}
#[test]
fn skip_hardlinks() {
assert_eq!(
rules(&["/foo/bar/H"]).apply("foo baz foo", false, true),
"foo baz foo"
);
}
#[test]
fn print_flag() {
assert_eq!(
rules(&["/foo/bar/p"]).apply("foo baz foo", false, false),
"bar baz foo"
);
}
#[test]
fn backreference() {
assert_eq!(
rules(&["/(foo)/\\1bar/g"]).apply("foo baz foo", false, false),
"foobar baz foobar"
);
assert_eq!(
rules(&["/(foo)/\\1bar/"]).apply("foo baz foo", false, false),
"foobar baz foo"
);
}
#[test]
fn tilde_replacement() {
assert_eq!(
rules(&["/foo/~bar~/g"]).apply("foo baz foo", false, false),
"foobarfoo baz foobarfoo"
);
}
#[test]
fn global_zero_length_match() {
let rules = SubstitutionRules::new(vec![SubstitutionRule::parse("/f*/<~>/g").unwrap()]);
assert_eq!(
rules.apply("in/d1/foo", false, false),
"<>i<>n<>/<>d<>1<>/<f><>o<>o<>"
);
}
#[test]
fn global_dollar_anchor() {
assert_eq!(
rules(&["/$/<END>/g"]).apply("ab", false, false),
"ab<END><END>"
);
}
#[test]
fn multi_rule_symlink_s_flag() {
let rules = SubstitutionRules::new(vec![
SubstitutionRule::parse("/realfile/foo/S").unwrap(),
SubstitutionRule::parse("/foo/realfile/").unwrap(),
]);
assert_eq!(rules.apply("in/d1/realfile", false, false), "in/d1/foo");
assert_eq!(rules.apply("in/d1/foo", false, false), "in/d1/realfile");
assert_eq!(rules.apply("realfile", true, false), "realfile");
}
#[test]
fn escaped_tilde_produces_literal_tilde() {
assert_eq!(
rules(&["/foo/\\~/g"]).apply("foo baz foo", false, false),
"~ baz ~"
);
}
#[test]
fn escaped_backslash() {
assert_eq!(
rules(&["/foo/\\\\/g"]).apply("foo baz foo", false, false),
"\\ baz \\"
);
}
#[test]
fn escaped_backslash_before_digit() {
assert_eq!(
rules(&["/(foo)/\\\\1/"]).apply("foo baz", false, false),
"\\1 baz"
);
}
#[test]
fn escaped_backslash_before_tilde() {
assert_eq!(
rules(&["/foo/\\\\~/g"]).apply("foo baz foo", false, false),
"\\foo baz \\foo"
);
}
#[test]
fn backslash_zero_is_literal() {
assert_eq!(
rules(&["/foo/\\0/"]).apply("foo baz", false, false),
"\\0 baz"
);
}
#[test]
fn global_substitution_consumes_position_for_next_rule() {
let rules = rules(&["/o/z/g", "/bar/baz/"]);
assert_eq!(rules.apply("in/d1/foo", false, false), "in/d1/fzz");
assert_eq!(rules.apply("in/d1/bar", false, false), "in/d1/baz");
}
#[test]
fn singular_substitution_partial_position_for_next_rule() {
let rules = rules(&["/o/z/", "/bar/baz/"]);
assert_eq!(rules.apply("in/d1/foo", false, false), "in/d1/fzo");
assert_eq!(rules.apply("in/d1/bar", false, false), "in/d1/baz");
}
#[test]
fn selective_symlink_repointing() {
let rules = rules(&["/realfile/foo/sR"]);
assert_eq!(rules.apply("realfile", true, false), "foo");
assert_eq!(rules.apply("realfile", false, false), "realfile");
}
#[test]
fn hardlink_only_substitution() {
let rules = rules(&["/target/newtarget/hR"]);
assert_eq!(rules.apply("target", false, true), "newtarget");
assert_eq!(rules.apply("target", false, false), "target");
}
#[test]
fn no_match_returns_original() {
assert_eq!(
rules(&["/xyz/abc/"]).apply("in/d1/foo", false, false),
"in/d1/foo"
);
}
#[test]
fn substitution_to_empty_string() {
assert_eq!(
rules(&[",in/d1/foo,,"]).apply("in/d1/foo", false, false),
""
);
}
#[test]
fn skipped_rule_does_not_affect_position() {
let rules = SubstitutionRules::new(vec![
SubstitutionRule::parse("/foo/bar/").unwrap(),
SubstitutionRule::parse("/bar/WRONG/R").unwrap(),
SubstitutionRule::parse("/bar/baz/").unwrap(),
]);
assert_eq!(rules.apply("in/d1/foo", false, false), "in/d1/bar");
}
}