#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum Predef {
Digit,
Word,
Space,
}
#[derive(Clone, Debug)]
pub struct Property {
pub name: String,
pub negated: bool,
pub pred: crate::unicode::PropFn,
}
#[derive(Clone, Debug)]
pub enum ClassItem {
Set(crate::charset::CharSet),
Predef {
kind: Predef,
negated: bool,
ascii: bool,
},
Prop {
pred: crate::unicode::PropFn,
negated: bool,
},
}
impl ClassItem {
pub fn matches(&self, c: char) -> bool {
match self {
ClassItem::Set(set) => set.contains(c),
ClassItem::Predef {
kind,
negated,
ascii,
} => {
let p = match kind {
Predef::Digit => crate::unicode::is_digit(c, *ascii),
Predef::Word => crate::unicode::is_word(c, *ascii),
Predef::Space => crate::unicode::is_space(c, *ascii),
};
p != *negated
}
ClassItem::Prop { pred, negated } => pred(c) != *negated,
}
}
}
#[derive(Clone, Debug, Default)]
pub struct CharClass {
pub items: Vec<ClassItem>,
pub negated: bool,
}
impl CharClass {
pub fn new() -> Self {
CharClass {
items: Vec::new(),
negated: false,
}
}
pub fn matches(&self, c: char) -> bool {
let any = self.items.iter().any(|m| m.matches(c));
any != self.negated
}
pub fn add_case_variants(&mut self) {
for item in &mut self.items {
if let ClassItem::Set(set) = item {
set.add_case_variants();
}
}
}
}
#[derive(Clone, Debug)]
pub enum Node {
Empty,
Lit {
ch: char,
ign: bool,
},
LitStr {
chars: Vec<char>,
ign: bool,
},
Any {
dotall: bool,
},
Class {
cc: CharClass,
},
Predef {
kind: Predef,
negated: bool,
ascii: bool,
},
Prop(Property),
StartLine {
multiline: bool,
},
EndLine {
multiline: bool,
},
StartText,
EndText,
WordBoundary {
negated: bool,
ascii: bool,
},
WordEdge {
end: bool,
ascii: bool,
},
Grapheme,
Group {
index: usize,
node: Box<Node>,
},
NonCap(Box<Node>),
Atomic(Box<Node>),
Branch {
alts: Vec<Node>,
},
Sequence {
items: Vec<Node>,
},
Repeat {
node: Box<Node>,
min: usize,
max: Option<usize>,
greedy: bool,
},
BackRef {
group: usize,
ign: bool,
},
Look {
behind: bool,
positive: bool,
node: Box<Node>,
},
}
impl Node {
pub fn seq(mut items: Vec<Node>) -> Node {
let mut flat: Vec<Node> = Vec::with_capacity(items.len());
for it in items.drain(..) {
match it {
Node::Empty => {}
Node::Sequence { items: sub } => flat.extend(sub),
other => flat.push(other),
}
}
match flat.len() {
0 => Node::Empty,
1 => flat.pop().unwrap(),
_ => Node::Sequence { items: flat },
}
}
pub fn quantified(
self,
min: usize,
max: Option<usize>,
greedy: bool,
possessive: bool,
) -> Node {
let repeat = Node::Repeat {
node: Box::new(self),
min,
max,
greedy,
};
if possessive {
Node::Atomic(Box::new(repeat))
} else {
repeat
}
}
pub fn dump(&self, f: &mut impl std::fmt::Write, indent: usize) -> std::fmt::Result {
let pad = " ".repeat(indent);
match self {
Node::Empty => writeln!(f, "{pad}Empty"),
Node::Lit { ch, ign } => writeln!(f, "{pad}Lit {ch:?} ign={ign}"),
Node::LitStr { chars, ign } => writeln!(f, "{pad}LitStr {:?} ign={ign}", chars),
Node::Any { dotall } => writeln!(f, "{pad}Any dotall={dotall}"),
Node::Class { cc } => {
writeln!(
f,
"{pad}Class negated={} ({} items)",
cc.negated,
cc.items.len()
)
}
Node::Predef {
kind,
negated,
ascii,
} => {
writeln!(f, "{pad}Predef {kind:?} neg={negated} ascii={ascii}")
}
Node::Prop(p) => writeln!(f, "{pad}Prop {} neg={}", p.name, p.negated),
Node::StartLine { multiline } => writeln!(f, "{pad}StartLine ml={multiline}"),
Node::EndLine { multiline } => writeln!(f, "{pad}EndLine ml={multiline}"),
Node::StartText => writeln!(f, "{pad}StartText"),
Node::EndText => writeln!(f, "{pad}EndText"),
Node::WordBoundary { negated, ascii } => {
writeln!(f, "{pad}WordBoundary neg={negated} ascii={ascii}")
}
Node::WordEdge { end, ascii } => {
writeln!(f, "{pad}WordEdge end={end} ascii={ascii}")
}
Node::Grapheme => writeln!(f, "{pad}Grapheme"),
Node::Group { index, node } => {
writeln!(f, "{pad}Group {index}")?;
node.dump(f, indent + 1)
}
Node::NonCap(n) => {
writeln!(f, "{pad}NonCap")?;
n.dump(f, indent + 1)
}
Node::Atomic(n) => {
writeln!(f, "{pad}Atomic")?;
n.dump(f, indent + 1)
}
Node::Branch { alts } => {
writeln!(f, "{pad}Branch")?;
for a in alts {
a.dump(f, indent + 1)?;
}
Ok(())
}
Node::Sequence { items } => {
writeln!(f, "{pad}Sequence")?;
for it in items {
it.dump(f, indent + 1)?;
}
Ok(())
}
Node::Repeat {
node,
min,
max,
greedy,
} => {
writeln!(f, "{pad}Repeat min={min} max={max:?} greedy={greedy}")?;
node.dump(f, indent + 1)
}
Node::BackRef { group, ign } => writeln!(f, "{pad}BackRef {group} ign={ign}"),
Node::Look {
behind,
positive,
node,
} => {
writeln!(f, "{pad}Look behind={behind} positive={positive}")?;
node.dump(f, indent + 1)
}
}
}
}