use std::fmt;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Ast {
pub expr: Expr,
pub flags: Flags,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Expr {
Empty,
Literal(char),
Concat(Vec<Expr>),
Alt(Vec<Expr>),
Repeat(Box<Repeat>),
Group(Box<Group>),
Class(Box<Class>),
Anchor(Anchor),
Lookaround(Box<Lookaround>),
Backref(u32),
Dot,
UnicodeProperty {
name: String,
negated: bool,
},
PerlClass(PerlClassKind),
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum PerlClassKind {
Digit,
NotDigit,
Word,
NotWord,
Whitespace,
NotWhitespace,
}
impl Expr {
pub fn is_nullable(&self) -> bool {
match self {
Expr::Empty => true,
Expr::Literal(_) => false,
Expr::Concat(exprs) => exprs.iter().all(|e| e.is_nullable()),
Expr::Alt(exprs) => exprs.iter().any(|e| e.is_nullable()),
Expr::Repeat(rep) => rep.min == 0 || rep.expr.is_nullable(),
Expr::Group(g) => g.expr.is_nullable(),
Expr::Class(_) => false,
Expr::Anchor(_) => true, Expr::Lookaround(_) => true, Expr::Backref(_) => false, Expr::Dot => false,
Expr::UnicodeProperty { .. } => false,
Expr::PerlClass(_) => false,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Repeat {
pub expr: Expr,
pub min: u32,
pub max: Option<u32>,
pub greedy: bool,
}
impl Repeat {
pub fn new(expr: Expr, min: u32, max: Option<u32>, greedy: bool) -> Self {
Self {
expr,
min,
max,
greedy,
}
}
pub fn star(expr: Expr, greedy: bool) -> Self {
Self::new(expr, 0, None, greedy)
}
pub fn plus(expr: Expr, greedy: bool) -> Self {
Self::new(expr, 1, None, greedy)
}
pub fn question(expr: Expr, greedy: bool) -> Self {
Self::new(expr, 0, Some(1), greedy)
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Group {
pub expr: Expr,
pub kind: GroupKind,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum GroupKind {
Capturing(u32),
NamedCapturing {
name: String,
index: u32,
},
NonCapturing,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Class {
pub ranges: Vec<ClassRange>,
pub negated: bool,
}
impl Class {
pub fn new(ranges: Vec<ClassRange>, negated: bool) -> Self {
Self { ranges, negated }
}
pub fn from_char(c: char) -> Self {
Self {
ranges: vec![ClassRange::single(c)],
negated: false,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct ClassRange {
pub start: char,
pub end: char,
}
impl ClassRange {
pub fn new(start: char, end: char) -> Self {
Self { start, end }
}
pub fn single(c: char) -> Self {
Self { start: c, end: c }
}
pub fn contains(&self, c: char) -> bool {
c >= self.start && c <= self.end
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Anchor {
StartOfString,
EndOfString,
StartOfLine,
EndOfLine,
WordBoundary,
NotWordBoundary,
StartOfInput,
EndOfInput,
EndOfInputBeforeNewline,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Lookaround {
pub expr: Expr,
pub kind: LookaroundKind,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum LookaroundKind {
PositiveLookahead,
NegativeLookahead,
PositiveLookbehind,
NegativeLookbehind,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub struct Flags {
pub case_insensitive: bool,
pub multi_line: bool,
pub dot_all: bool,
pub extended: bool,
pub unicode: bool,
}
impl Flags {
pub fn new() -> Self {
Self::default()
}
}
impl fmt::Display for Expr {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Expr::Empty => Ok(()),
Expr::Literal(c) => write!(f, "{}", escape_char(*c)),
Expr::Concat(exprs) => {
for expr in exprs {
write!(f, "{}", expr)?;
}
Ok(())
}
Expr::Alt(exprs) => {
for (i, expr) in exprs.iter().enumerate() {
if i > 0 {
write!(f, "|")?;
}
write!(f, "{}", expr)?;
}
Ok(())
}
Expr::Repeat(rep) => {
let needs_group = matches!(rep.expr, Expr::Concat(_) | Expr::Alt(_));
if needs_group {
write!(f, "(?:{})", rep.expr)?;
} else {
write!(f, "{}", rep.expr)?;
}
match (rep.min, rep.max) {
(0, None) => write!(f, "*")?,
(1, None) => write!(f, "+")?,
(0, Some(1)) => write!(f, "?")?,
(n, None) => write!(f, "{{{},}}", n)?,
(n, Some(m)) if n == m => write!(f, "{{{}}}", n)?,
(n, Some(m)) => write!(f, "{{{},{}}}", n, m)?,
}
if !rep.greedy {
write!(f, "?")?;
}
Ok(())
}
Expr::Group(g) => match &g.kind {
GroupKind::Capturing(_) => write!(f, "({})", g.expr),
GroupKind::NamedCapturing { name, .. } => write!(f, "(?<{}>{})", name, g.expr),
GroupKind::NonCapturing => write!(f, "(?:{})", g.expr),
},
Expr::Class(cls) => {
write!(f, "[")?;
if cls.negated {
write!(f, "^")?;
}
for range in &cls.ranges {
if range.start == range.end {
write!(f, "{}", escape_char(range.start))?;
} else {
write!(f, "{}-{}", escape_char(range.start), escape_char(range.end))?;
}
}
write!(f, "]")
}
Expr::Anchor(a) => write!(f, "{}", a),
Expr::Lookaround(la) => {
let prefix = match la.kind {
LookaroundKind::PositiveLookahead => "?=",
LookaroundKind::NegativeLookahead => "?!",
LookaroundKind::PositiveLookbehind => "?<=",
LookaroundKind::NegativeLookbehind => "?<!",
};
write!(f, "({}{})", prefix, la.expr)
}
Expr::Backref(n) => write!(f, "\\{}", n),
Expr::Dot => write!(f, "."),
Expr::UnicodeProperty { name, negated } => {
if *negated {
write!(f, "\\P{{{}}}", name)
} else {
write!(f, "\\p{{{}}}", name)
}
}
Expr::PerlClass(kind) => match kind {
PerlClassKind::Digit => write!(f, "\\d"),
PerlClassKind::NotDigit => write!(f, "\\D"),
PerlClassKind::Word => write!(f, "\\w"),
PerlClassKind::NotWord => write!(f, "\\W"),
PerlClassKind::Whitespace => write!(f, "\\s"),
PerlClassKind::NotWhitespace => write!(f, "\\S"),
},
}
}
}
impl fmt::Display for Anchor {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Anchor::StartOfString | Anchor::StartOfLine => write!(f, "^"),
Anchor::EndOfString | Anchor::EndOfLine => write!(f, "$"),
Anchor::WordBoundary => write!(f, "\\b"),
Anchor::NotWordBoundary => write!(f, "\\B"),
Anchor::StartOfInput => write!(f, "\\A"),
Anchor::EndOfInput => write!(f, "\\z"),
Anchor::EndOfInputBeforeNewline => write!(f, "\\Z"),
}
}
}
fn escape_char(c: char) -> String {
match c {
'\\' | '.' | '*' | '+' | '?' | '(' | ')' | '[' | ']' | '{' | '}' | '|' | '^' | '$' => {
format!("\\{}", c)
}
'\n' => "\\n".to_string(),
'\r' => "\\r".to_string(),
'\t' => "\\t".to_string(),
c if c.is_ascii_graphic() || c == ' ' => c.to_string(),
c => format!("\\u{{{:X}}}", c as u32),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_repeat_constructors() {
let lit = Expr::Literal('a');
let star = Repeat::star(lit.clone(), true);
assert_eq!(star.min, 0);
assert_eq!(star.max, None);
assert!(star.greedy);
let plus = Repeat::plus(lit.clone(), false);
assert_eq!(plus.min, 1);
assert_eq!(plus.max, None);
assert!(!plus.greedy);
let question = Repeat::question(lit, true);
assert_eq!(question.min, 0);
assert_eq!(question.max, Some(1));
}
#[test]
fn test_class_range() {
let range = ClassRange::new('a', 'z');
assert!(range.contains('m'));
assert!(!range.contains('A'));
}
#[test]
fn test_is_nullable() {
assert!(Expr::Empty.is_nullable());
assert!(!Expr::Literal('a').is_nullable());
assert!(Expr::Repeat(Box::new(Repeat::star(Expr::Literal('a'), true))).is_nullable());
}
}