use crate::Span;
use bitflags::bitflags;
bitflags! {
pub struct Flags: u8 {
const G = 0b00000001;
const M = 0b00000010;
const I = 0b00000100;
const S = 0b00001000;
const U = 0b00010000;
const Y = 0b00100000;
}
}
#[derive(Debug, Clone)]
pub struct Regex {
pub node: Node,
pub flags: Flags,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Node {
Empty,
Disjunction(Span, Vec<Node>),
Assertion(Span, AssertionKind),
Alternative(Span, Vec<Node>),
Literal(Span, char, String),
PerlClass(Span, ClassPerlKind, bool),
BackReference(Span, u32),
Dot(Span),
CharacterClass(Span, CharacterClass),
Group(Span, Group),
Quantifier(Span, Box<Node>, QuantifierKind, bool),
NamedBackReference(Span, String),
}
impl Node {
pub fn expanded_nodes(&mut self) -> Box<dyn Iterator<Item = &mut Node> + '_> {
if let Node::Alternative(_, nodes) = self {
Box::new((*nodes).iter_mut())
} else {
Box::new(Some(self).into_iter())
}
}
pub fn span(&self) -> Option<Span> {
Some(
match self {
Node::Empty => return None,
Node::Disjunction(s, _) => s,
Node::Assertion(s, _) => s,
Node::Alternative(s, _) => s,
Node::Literal(s, _, _) => s,
Node::PerlClass(s, _, _) => s,
Node::BackReference(s, _) => s,
Node::Dot(s) => s,
Node::CharacterClass(s, _) => s,
Node::Group(s, _) => s,
Node::Quantifier(s, _, _, _) => s,
Node::NamedBackReference(s, _) => s,
}
.to_owned(),
)
}
}
impl ToString for Node {
fn to_string(&self) -> String {
match self {
Node::Alternative(_, nodes) => nodes
.iter()
.map(|x| x.to_string())
.collect::<Vec<_>>()
.join(""),
Node::Empty => Default::default(),
Node::Disjunction(_, nodes) => nodes
.iter()
.map(|x| x.to_string())
.collect::<Vec<_>>()
.join("|"),
Node::Assertion(_, kind) => kind.to_string(),
Node::Literal(_, _, string) => string.to_owned(),
Node::Dot(_) => ".".to_string(),
Node::NamedBackReference(_, string) => {
format!("\\k<{}>", string)
}
Node::BackReference(_, num) => {
format!("\\{}", num)
}
Node::CharacterClass(_, CharacterClass { members, negated }) => {
format!(
"[{}{}]",
if *negated { "^" } else { "" },
members
.iter()
.map(|x| x.to_string())
.collect::<Vec<_>>()
.join("")
)
}
Node::Quantifier(_, node, kind, lazy) => {
let kind_string = match kind {
QuantifierKind::AtLeastOne => "+".to_string(),
QuantifierKind::Multiple => "*".to_string(),
QuantifierKind::Optional => "?".to_string(),
QuantifierKind::Number(num) => format!("{{{}}}", num),
QuantifierKind::Between(from, to) => format!(
"{{{},{}}}",
from,
to.map(|x| x.to_string()).unwrap_or_default()
),
};
format!(
"{}{}{}",
node.to_string(),
kind_string,
if *lazy { "?" } else { "" }
)
}
Node::Group(
_,
Group {
name,
noncapturing,
inner,
},
) => {
format!(
"({}{})",
if *noncapturing {
"?:".to_string()
} else if let Some(name) = name {
format!("\\<{}>", name)
} else {
"".to_string()
},
inner.to_string()
)
}
Node::PerlClass(_, kind, negative) => match kind {
ClassPerlKind::Digit if *negative => "\\D".to_string(),
ClassPerlKind::Digit => "\\d".to_string(),
ClassPerlKind::Space if *negative => "\\S".to_string(),
ClassPerlKind::Space => "\\s".to_string(),
ClassPerlKind::Word if *negative => "\\W".to_string(),
ClassPerlKind::Word => "\\w".to_string(),
ClassPerlKind::Unicode(a, b) => {
format!(
"\\{}{{{}{}}}",
if *negative { "P" } else { "p" },
if let Some(a) = a {
format!("{}=", a)
} else {
"".to_string()
},
b
)
}
},
}
}
}
impl ToString for AssertionKind {
fn to_string(&self) -> String {
match self {
AssertionKind::StartOfLine => "^".to_string(),
AssertionKind::EndOfLine => "$".to_string(),
AssertionKind::WordBoundary => r"\b".to_string(),
AssertionKind::NonWordBoundary => r"\B".to_string(),
AssertionKind::Lookahead(node) => format!("(?={})", node.to_string()),
AssertionKind::NegativeLookahead(node) => format!("(?!{})", node.to_string()),
AssertionKind::Lookbehind(node) => format!("(?<={})", node.to_string()),
AssertionKind::NegativeLookbehind(node) => format!("(?<!{})", node.to_string()),
}
}
}
impl ToString for CharacterClassMember {
fn to_string(&self) -> String {
match self {
CharacterClassMember::Range(a, b) => format!("{}-{}", a.to_string(), b.to_string()),
CharacterClassMember::Single(node) => node.to_string(),
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Group {
pub noncapturing: bool,
pub inner: Box<Node>,
pub name: Option<String>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum QuantifierKind {
Optional,
Multiple,
AtLeastOne,
Number(u32),
Between(u32, Option<u32>),
}
impl QuantifierKind {
pub fn is_at_least_one(&self) -> bool {
matches!(self, Self::AtLeastOne)
}
pub fn is_multiple(&self) -> bool {
matches!(self, Self::Multiple)
}
pub fn is_optional(&self) -> bool {
matches!(self, Self::Optional)
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct CharacterClass {
pub negated: bool,
pub members: Vec<CharacterClassMember>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum CharacterClassMember {
Range(Node, Node),
Single(Node),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum AssertionKind {
StartOfLine,
EndOfLine,
WordBoundary,
NonWordBoundary,
Lookahead(Box<Node>),
NegativeLookahead(Box<Node>),
Lookbehind(Box<Node>),
NegativeLookbehind(Box<Node>),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ClassPerlKind {
Digit,
Word,
Space,
Unicode(Option<String>, String),
}