use crate::error::{Error, Result};
use crate::variable::{varname_prefix, Value, VariableMap};
use regex::{escape, Regex, RegexBuilder};
use std::fmt::{self, Display, Formatter, Write};
use std::str::FromStr;
pub struct Pattern {
parts: Vec<Part>,
defs: Vec<String>,
}
#[derive(Debug, PartialEq, Eq)]
pub enum Part {
Text(String),
Regex(String),
Var(String),
DefLit { def: usize, regex: String },
DefVar { def: usize, var: String },
}
impl Part {
pub fn ref_var(&self) -> Option<&str> {
match *self {
Part::Var(ref var) | Part::DefVar { ref var, .. } => Some(var),
_ => None,
}
}
}
impl Pattern {
fn new() -> Self {
Self {
parts: Vec::new(),
defs: Vec::new(),
}
}
pub fn defines_var(&self, v: &str) -> bool {
self.defs.iter().any(|d| d == v)
}
fn add_def(&mut self, v: &str) -> Result<usize> {
if self.defines_var(v) {
Err(Error::DuplicateDef(format!(
"duplicate definition of ${} in same pattern",
v
)))
} else {
let idx = self.defs.len();
self.defs.push(v.to_string());
Ok(idx)
}
}
fn parse_part(&mut self, s: &str) -> Result<(Part, usize)> {
let dollar = s.find('$');
if dollar != Some(0) {
let end = dollar.unwrap_or(s.len());
return Ok((Part::Text(s[0..end].to_string()), end));
}
if s.starts_with("$$") {
return Ok((Part::Text("$".to_string()), 2));
}
let varname_end = 1 + varname_prefix(&s[1..]);
if varname_end != 1 {
return Ok((Part::Var(s[1..varname_end].to_string()), varname_end));
}
if s.len() < 2 || !s.starts_with("$(") {
return Err(Error::Syntax(
"pattern syntax error, use $$ to match a single $".to_string(),
));
}
let varname_end = 2 + varname_prefix(&s[2..]);
let varname = s[2..varname_end].to_string();
match s[varname_end..].chars().next() {
None => {
return Err(Error::Syntax(format!("unterminated $({}...", varname)));
}
Some(')') => {
let part = if varname.is_empty() {
Part::Text(varname)
} else {
Part::Var(varname)
};
return Ok((part, varname_end + 1));
}
Some('=') => {
}
Some(ch) => {
return Err(Error::Syntax(format!(
"syntax error in $({}... '{}'",
varname, ch
)));
}
}
let def = if varname.is_empty() {
None
} else {
Some(self.add_def(&varname)?)
};
if s[varname_end + 1..].starts_with('$') {
let refname_begin = varname_end + 2;
let refname_end = refname_begin + varname_prefix(&s[refname_begin..]);
if refname_begin == refname_end {
return Err(Error::Syntax(format!(
"expected variable name in $({}=$...",
varname
)));
}
if !s[refname_end..].starts_with(')') {
return Err(Error::Syntax(format!(
"expected ')' after $({}=${}...",
varname,
&s[refname_begin..refname_end]
)));
}
let refname = s[refname_begin..refname_end].to_string();
return if let Some(defidx) = def {
Ok((
Part::DefVar {
def: defidx,
var: refname,
},
refname_end + 1,
))
} else {
Err(Error::Syntax(format!(
"expected variable name in $(=${})",
refname
)))
};
}
let rx_begin = varname_end + 1;
let rx_end = rx_begin + regex_prefix(&s[rx_begin..]);
if s[rx_end..].starts_with(')') {
let part = if let Some(defidx) = def {
Part::DefLit {
def: defidx,
regex: format!("(?P<{}>{})", varname, &s[rx_begin..rx_end]),
}
} else {
Part::Regex(format!("(?:{})", &s[rx_begin..rx_end]))
};
Ok((part, rx_end + 1))
} else {
Err(Error::Syntax(format!(
"missing ')' after regex in $({}={}",
varname,
&s[rx_begin..rx_end]
)))
}
}
}
fn regex_prefix(s: &str) -> usize {
let mut escape = false;
enum State {
Normal, Curly, CSFirst, CSNeg, CSBody, }
let mut state = State::Normal;
let mut nest = 0usize;
for (idx, ch) in s.char_indices() {
if escape {
escape = false;
continue;
} else if ch == '\\' {
escape = true;
continue;
}
match state {
State::Normal => match ch {
'[' => state = State::CSFirst,
'{' => state = State::Curly,
'(' => nest += 1,
')' if nest > 0 => nest -= 1,
')' | '}' => return idx,
_ => {}
},
State::Curly => {
if ch == '}' {
state = State::Normal;
}
}
State::CSFirst => {
state = match ch {
'^' => State::CSNeg,
_ => State::CSBody,
}
}
State::CSNeg => state = State::CSBody,
State::CSBody => {
if ch == ']' {
state = State::Normal;
}
}
}
}
s.len()
}
impl FromStr for Pattern {
type Err = Error;
fn from_str(s: &str) -> Result<Pattern> {
let s = s.trim();
let mut pat = Pattern::new();
let mut pos = 0;
while pos < s.len() {
let (part, len) = pat.parse_part(&s[pos..])?;
if let Some(v) = part.ref_var() {
if pat.defines_var(v) {
return Err(Error::Backref(format!(
"unsupported back-reference to '${}' \
defined in same pattern",
v
)));
}
}
pat.parts.push(part);
pos += len;
}
Ok(pat)
}
}
impl Pattern {
pub fn parts(&self) -> &[Part] {
&self.parts
}
pub fn defs(&self) -> &[String] {
&self.defs
}
pub fn resolve(&self, vmap: &dyn VariableMap) -> Result<Regex> {
let mut out = String::new();
if let Some(&Part::Text(ref s)) = self.parts.first() {
if s.starts_with(char::is_alphanumeric) {
out.push_str(r"\b");
}
}
for part in &self.parts {
match *part {
Part::Text(ref s) => {
out.push_str(&escape(s));
}
Part::Regex(ref rx) => out.push_str(rx),
Part::Var(ref var) => {
match vmap.lookup(var) {
None => {
return Err(Error::UndefVariable(format!(
"undefined variable ${}",
var
)));
}
Some(Value::Text(s)) => out.push_str(&escape(&s)),
Some(Value::Regex(rx)) => write!(out, "(?:{})", rx).unwrap(),
}
}
Part::DefLit { ref regex, .. } => out.push_str(regex),
Part::DefVar { def, ref var } => {
write!(out, "(?P<{}>", self.defs[def]).unwrap();
match vmap.lookup(var) {
None => {
return Err(Error::UndefVariable(format!(
"undefined variable ${}",
var
)));
}
Some(Value::Text(s)) => write!(out, "{})", escape(&s[..])).unwrap(),
Some(Value::Regex(rx)) => write!(out, "{})", rx).unwrap(),
}
}
}
}
if let Some(&Part::Text(ref s)) = self.parts.last() {
if s.ends_with(char::is_alphanumeric) {
out.push_str(r"\b");
}
}
Ok(RegexBuilder::new(&out).multi_line(true).build()?)
}
}
impl Display for Pattern {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
for part in &self.parts {
use self::Part::*;
match *part {
Text(ref txt) if txt == "" => write!(f, "$()"),
Text(ref txt) if txt == "$" => write!(f, "$$"),
Text(ref txt) => write!(f, "{}", txt),
Regex(ref rx) => write!(f, "$(={})", rx),
Var(ref var) => write!(f, "$({})", var),
DefLit { def, ref regex } => {
let defvar = &self.defs[def];
let litrx = ®ex[5 + defvar.len()..regex.len() - 1];
write!(f, "$({}={})", defvar, litrx)
}
DefVar { def, ref var } => write!(f, "$({}=${})", self.defs[def], var),
}?;
}
Ok(())
}
}
#[cfg(test)]
mod tests {
#[test]
fn regex() {
use super::regex_prefix;
assert_eq!(regex_prefix(""), 0);
assert_eq!(regex_prefix(")"), 0);
assert_eq!(regex_prefix(")c"), 0);
assert_eq!(regex_prefix("x"), 1);
assert_eq!(regex_prefix("x)x"), 1);
assert_eq!(regex_prefix("x(c))x"), 4);
assert_eq!(regex_prefix("()x(c))x"), 6);
assert_eq!(regex_prefix("()x(c)"), 6);
assert_eq!(regex_prefix("x([)]))x"), 6);
assert_eq!(regex_prefix("x[)])x"), 4);
assert_eq!(regex_prefix("x[^)])x"), 5);
assert_eq!(regex_prefix("x[^])x"), 6);
}
#[test]
fn part() {
use super::{Part, Pattern};
let mut pat = Pattern::new();
assert_eq!(pat.parse_part("").unwrap(), (Part::Text("".to_string()), 0));
assert_eq!(
pat.parse_part("x").unwrap(),
(Part::Text("x".to_string()), 1)
);
assert_eq!(
pat.parse_part("x2").unwrap(),
(Part::Text("x2".to_string()), 2,)
);
assert_eq!(
pat.parse_part("x$").unwrap(),
(Part::Text("x".to_string()), 1,)
);
assert_eq!(
pat.parse_part("x$$").unwrap(),
(Part::Text("x".to_string()), 1,)
);
assert_eq!(
pat.parse_part("$").unwrap_err().to_string(),
"pattern syntax error, use $$ to match a single $"
);
assert_eq!(
pat.parse_part("$$").unwrap(),
(Part::Text("$".to_string()), 2,)
);
assert_eq!(
pat.parse_part("$$ ").unwrap(),
(Part::Text("$".to_string()), 2,)
);
assert_eq!(
pat.parse_part("$0").unwrap(),
(Part::Var("0".to_string()), 2)
);
assert_eq!(
pat.parse_part("$xx=").unwrap(),
(Part::Var("xx".to_string()), 3,)
);
assert_eq!(
pat.parse_part("$xx$").unwrap(),
(Part::Var("xx".to_string()), 3,)
);
assert_eq!(
pat.parse_part("$(0)").unwrap(),
(Part::Var("0".to_string()), 4,)
);
assert_eq!(
pat.parse_part("$()").unwrap(),
(Part::Text("".to_string()), 3,)
);
assert_eq!(
pat.parse_part("$(0").unwrap_err().to_string(),
("unterminated $(0...")
);
assert_eq!(
pat.parse_part("$(foo:").unwrap_err().to_string(),
("syntax error in $(foo... ':'")
);
assert_eq!(
pat.parse_part("$(foo =").unwrap_err().to_string(),
("syntax error in $(foo... ' '")
);
assert_eq!(
pat.parse_part("$(eo0=$bar").unwrap_err().to_string(),
("expected ')' after $(eo0=$bar...")
);
assert_eq!(
pat.parse_part("$(eo1=$bar}").unwrap_err().to_string(),
("expected ')' after $(eo1=$bar...")
);
assert_eq!(
pat.parse_part("$(eo2=$)").unwrap_err().to_string(),
("expected variable name in $(eo2=$...")
);
assert_eq!(
pat.parse_part("$(eo3=$-)").unwrap_err().to_string(),
("expected variable name in $(eo3=$...")
);
}
#[test]
fn partdefs() {
use super::{Part, Pattern};
let mut pat = Pattern::new();
assert_eq!(
pat.parse_part("$(foo=$bar)").unwrap(),
(
Part::DefVar {
def: 0,
var: "bar".to_string(),
},
11,
)
);
assert_eq!(
pat.parse_part("$(foo=$bar)").unwrap_err().to_string(),
"duplicate definition of $foo in same pattern"
);
assert_eq!(
pat.parse_part("$(fxo=$bar)x").unwrap(),
(
Part::DefVar {
def: 1,
var: "bar".to_string(),
},
11,
)
);
assert_eq!(
pat.parse_part("$(fo2=[a-z])").unwrap(),
(
Part::DefLit {
def: 2,
regex: "(?P<fo2>[a-z])".to_string(),
},
12,
)
);
assert_eq!(
pat.parse_part("$(fo3=[a-)])").unwrap(),
(
Part::DefLit {
def: 3,
regex: "(?P<fo3>[a-)])".to_string(),
},
12,
)
);
assert_eq!(
pat.parse_part("$(fo4=)").unwrap(),
(
Part::DefLit {
def: 4,
regex: "(?P<fo4>)".to_string(),
},
7,
)
);
assert_eq!(
pat.parse_part("$(=.*)").unwrap(),
(Part::Regex("(?:.*)".to_string(),), 6,)
);
assert_eq!(
pat.parse_part("$(=)").unwrap(),
(Part::Regex("(?:)".to_string(),), 4,)
);
assert_eq!(
pat.parse_part("$()").unwrap(),
(Part::Text("".to_string()), 3,)
);
}
#[test]
fn pattern() {
use super::Pattern;
let p: Pattern = " Hello world! ".parse().unwrap();
assert_eq!(format!("{:?}", p.parts), "[Text(\"Hello world!\")]");
let p: Pattern = " $foo=$(bar) ".parse().unwrap();
assert_eq!(
format!("{:?}", p.parts),
"[Var(\"foo\"), Text(\"=\"), Var(\"bar\")]"
);
}
}