use crate::error::{Error, Result};
use crate::runtime::{Runtime, Value};
use regex::Regex;
pub(crate) fn is_literal_pattern(pat: &str) -> bool {
!pat.bytes().any(|b| {
matches!(
b,
b'.' | b'*'
| b'+'
| b'?'
| b'['
| b']'
| b'('
| b')'
| b'{'
| b'}'
| b'|'
| b'^'
| b'$'
| b'\\'
)
})
}
#[inline]
pub(crate) fn gsub_literal_eligible(re_pat: &str, repl: &str) -> bool {
is_literal_pattern(re_pat) && !repl.contains('&') && !repl.contains('\\')
}
fn literal_substring_absent(rt: &mut Runtime, needle: &str, hay: &str) -> bool {
if needle.is_empty() {
return !hay.contains(needle);
}
rt.literal_substring_finder(needle)
.find(hay.as_bytes())
.is_none()
}
fn literal_replace_all(s: &str, needle: &str, repl: &str, rt: &mut Runtime) -> (String, usize) {
if needle.is_empty() {
return (s.to_string(), 0);
}
let finder = rt.literal_substring_finder(needle);
let hay = s.as_bytes();
let mut out = String::with_capacity(s.len());
let mut count = 0usize;
let mut last = 0usize;
let mut off = 0usize;
while off < hay.len() {
let Some(rel) = finder.find(&hay[off..]) else {
break;
};
let abs = off + rel;
out.push_str(&s[last..abs]);
out.push_str(repl);
count += 1;
last = abs + needle.len();
off = last;
}
out.push_str(&s[last..]);
(out, count)
}
pub fn gsub(
rt: &mut Runtime,
re_pat: &str,
repl: &str,
target: Option<&mut String>,
) -> Result<f64> {
let repl_has_special = repl.contains('&') || repl.contains('\\');
let use_literal = is_literal_pattern(re_pat) && !repl_has_special;
let n = if let Some(t) = target {
if use_literal {
if literal_substring_absent(rt, re_pat, t) {
0
} else {
let (new_s, c) = literal_replace_all(t.as_str(), re_pat, repl, rt);
*t = new_s;
c
}
} else {
rt.ensure_regex(re_pat).map_err(Error::Runtime)?;
let re = rt.regex_ref(re_pat);
if !re.is_match(t.as_str()) {
0
} else {
let (new_s, c) = replace_all_awk(re, t.as_str(), repl, repl_has_special);
*t = new_s;
c
}
}
} else {
let cur = std::mem::take(&mut rt.record);
let (new_s, c) = if use_literal {
if literal_substring_absent(rt, re_pat, &cur) {
rt.record = cur;
return Ok(0.0);
}
literal_replace_all(&cur, re_pat, repl, rt)
} else {
rt.ensure_regex(re_pat).map_err(Error::Runtime)?;
let re = rt.regex_ref(re_pat);
if !re.is_match(&cur) {
rt.record = cur;
return Ok(0.0);
}
replace_all_awk(re, &cur, repl, repl_has_special)
};
drop(cur);
let fs = rt
.vars
.get("FS")
.map(|v| v.as_str())
.unwrap_or_else(|| " ".into());
rt.set_field_sep_split_owned(&fs, new_s);
c
};
Ok(n as f64)
}
pub fn sub_fn(
rt: &mut Runtime,
re_pat: &str,
repl: &str,
target: Option<&mut String>,
) -> Result<f64> {
rt.ensure_regex(re_pat).map_err(Error::Runtime)?;
let repl_has_special = repl.contains('&') || repl.contains('\\');
let n = if let Some(t) = target {
if let Some(m) = rt.regex_ref(re_pat).find(t.as_str()) {
let piece = if repl_has_special {
expand_repl(repl, m.as_str())
} else {
repl.to_string()
};
let mut out = String::with_capacity(t.len() + piece.len());
out.push_str(&t[..m.start()]);
out.push_str(&piece);
out.push_str(&t[m.end()..]);
*t = out;
1.0
} else {
0.0
}
} else {
let cur = std::mem::take(&mut rt.record);
if let Some(m) = rt.regex_ref(re_pat).find(&cur) {
let piece = if repl_has_special {
expand_repl(repl, m.as_str())
} else {
repl.to_string()
};
let mut out = String::with_capacity(cur.len() + piece.len());
out.push_str(&cur[..m.start()]);
out.push_str(&piece);
out.push_str(&cur[m.end()..]);
drop(cur);
let fs = rt
.vars
.get("FS")
.map(|v| v.as_str())
.unwrap_or_else(|| " ".into());
rt.set_field_sep_split_owned(&fs, out);
1.0
} else {
rt.record = cur;
0.0
}
};
Ok(n)
}
pub fn match_fn(rt: &mut Runtime, s: &str, re_pat: &str, arr_name: Option<&str>) -> Result<f64> {
rt.ensure_regex(re_pat).map_err(Error::Runtime)?;
let re = rt.regex_ref(re_pat).clone();
if let Some(m) = re.find(s) {
let rstart = (m.start() + 1) as f64;
let rlength = m.len() as f64;
rt.vars.insert("RSTART".into(), Value::Num(rstart));
rt.vars.insert("RLENGTH".into(), Value::Num(rlength));
if let Some(a) = arr_name {
rt.array_delete(a, None);
if let Some(caps) = re.captures(s) {
for i in 1..caps.len() {
let key = format!("{i}");
let val = caps
.get(i)
.map(|x| x.as_str().to_string())
.unwrap_or_default();
rt.array_set(a, key, Value::Str(val));
}
}
}
Ok(rstart)
} else {
rt.vars.insert("RSTART".into(), Value::Num(0.0));
rt.vars.insert("RLENGTH".into(), Value::Num(-1.0));
if let Some(a) = arr_name {
rt.array_delete(a, None);
}
Ok(0.0)
}
}
fn replace_all_awk(re: &Regex, s: &str, repl: &str, repl_has_special: bool) -> (String, usize) {
let mut count = 0usize;
let mut out = String::with_capacity(s.len());
let mut last = 0;
for m in re.find_iter(s) {
count += 1;
out.push_str(&s[last..m.start()]);
if repl_has_special {
out.push_str(&expand_repl(repl, m.as_str()));
} else {
out.push_str(repl);
}
last = m.end();
}
out.push_str(&s[last..]);
(out, count)
}
fn expand_repl(repl: &str, matched: &str) -> String {
let mut out = String::new();
let mut chars = repl.chars().peekable();
while let Some(c) = chars.next() {
if c == '&' {
out.push_str(matched);
} else if c == '\\' {
match chars.peek() {
Some('&') => {
chars.next();
out.push('&');
}
Some('\\') => {
chars.next();
out.push('\\');
}
Some(x) => {
let x = *x;
chars.next();
out.push(x);
}
None => out.push('\\'),
}
} else {
out.push(c);
}
}
out
}
pub fn patsplit(
rt: &mut Runtime,
s: &str,
arr_name: &str,
fieldpat: Option<&str>,
seps_name: Option<&str>,
) -> Result<f64> {
let fp_owned = match fieldpat {
Some(s) => s.to_string(),
None => rt
.get_global_var("FPAT")
.map(|v| v.as_str())
.unwrap_or_default(),
};
let fp = if fp_owned.is_empty() {
"[^[:space:]]+"
} else {
fp_owned.as_str()
};
let re = Regex::new(fp).map_err(|e| Error::Runtime(e.to_string()))?;
let matches: Vec<regex::Match> = re.find_iter(s).collect();
let n = matches.len();
rt.array_delete(arr_name, None);
for (i, m) in matches.iter().enumerate() {
rt.array_set(
arr_name,
format!("{}", i + 1),
Value::Str(m.as_str().to_string()),
);
}
if let Some(sep_arr) = seps_name {
rt.array_delete(sep_arr, None);
for i in 1..n {
let prev = &matches[i - 1];
let curr = &matches[i];
let sep = &s[prev.end()..curr.start()];
rt.array_set(sep_arr, format!("{i}"), Value::Str(sep.to_string()));
}
}
Ok(n as f64)
}
#[cfg(test)]
mod tests {
use super::{gsub, gsub_literal_eligible, is_literal_pattern, match_fn, patsplit, sub_fn};
use crate::runtime::{Runtime, Value};
fn rt_with_fs() -> Runtime {
let mut rt = Runtime::new();
rt.vars.insert("FS".into(), Value::Str(" ".into()));
rt
}
#[test]
fn gsub_literal_on_record_replaces_and_resplits() {
let mut rt = rt_with_fs();
rt.record = "foofoo".into();
let n = gsub(&mut rt, "foo", "bar", None).unwrap();
assert_eq!(n, 2.0);
assert_eq!(rt.record, "barbar");
}
#[test]
fn gsub_regex_with_amp_replacement() {
let mut rt = rt_with_fs();
rt.record = "ab".into();
let n = gsub(&mut rt, "a", "X&Y", None).unwrap();
assert_eq!(n, 1.0);
assert_eq!(rt.record, "XaYb");
}
#[test]
fn sub_first_match_only() {
let mut rt = rt_with_fs();
rt.record = "aaa".into();
let n = sub_fn(&mut rt, "a", "b", None).unwrap();
assert_eq!(n, 1.0);
assert_eq!(rt.record, "baa");
}
#[test]
fn match_sets_rstart_rlength_on_hit() {
let mut rt = Runtime::new();
let n = match_fn(&mut rt, "foo123bar", "[0-9]+", None).unwrap();
assert_eq!(n, 4.0);
assert_eq!(rt.vars.get("RSTART").unwrap().as_number(), 4.0);
assert_eq!(rt.vars.get("RLENGTH").unwrap().as_number(), 3.0);
}
#[test]
fn match_sets_rstart_zero_on_miss() {
let mut rt = Runtime::new();
let n = match_fn(&mut rt, "abc", "[0-9]+", None).unwrap();
assert_eq!(n, 0.0);
assert_eq!(rt.vars.get("RSTART").unwrap().as_number(), 0.0);
assert_eq!(rt.vars.get("RLENGTH").unwrap().as_number(), -1.0);
}
#[test]
fn patsplit_fills_array() {
let mut rt = Runtime::new();
let n = patsplit(&mut rt, "x y z", "parts", Some("[a-z]+"), None).unwrap();
assert_eq!(n, 3.0);
assert_eq!(rt.array_get("parts", "1").as_str(), "x");
assert_eq!(rt.array_get("parts", "2").as_str(), "y");
assert_eq!(rt.array_get("parts", "3").as_str(), "z");
}
#[test]
fn is_literal_pattern_accepts_plain_text() {
assert!(is_literal_pattern("hello"));
}
#[test]
fn is_literal_pattern_rejects_regex_metachar() {
assert!(!is_literal_pattern("a.c"));
}
#[test]
fn gsub_literal_eligible_rejects_ampersand_in_replacement() {
assert!(!gsub_literal_eligible("x", "a&b"));
}
#[test]
fn gsub_literal_eligible_accepts_simple_pair() {
assert!(gsub_literal_eligible("needle", "repl"));
}
}