#![forbid(unsafe_code)]
use crate::escape_ascii;
#[derive(Clone, Debug, PartialOrd, PartialEq)]
pub enum Node {
Final(FinalNode),
NonFinal(NonFinalNode),
}
impl Node {
#[allow(clippy::must_use_candidate)]
#[allow(clippy::match_wildcard_for_single_variants)]
#[allow(clippy::missing_panics_doc)]
pub fn unwrap_final(self) -> FinalNode {
match self {
Node::Final(node) => node,
other => panic!("unwrap_final() called on value: {:?}", other),
}
}
#[allow(clippy::must_use_candidate)]
#[allow(clippy::match_wildcard_for_single_variants)]
#[allow(clippy::missing_panics_doc)]
pub fn unwrap_non_final(self) -> NonFinalNode {
match self {
Node::NonFinal(node) => node,
other => panic!("unwrap_non_final() called on value: {:?}", other),
}
}
}
#[derive(Copy, Clone, PartialOrd, PartialEq)]
pub enum ClassItem {
Byte(u8),
ByteRange(u8, u8),
}
impl core::fmt::Debug for ClassItem {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> Result<(), core::fmt::Error> {
match self {
ClassItem::Byte(b) => write!(f, "Byte({})", escape_ascii([*b])),
ClassItem::ByteRange(a, b) => {
write!(
f,
"ByteRange({}-{})",
escape_ascii([*a]),
escape_ascii([*b])
)
}
}
}
}
#[derive(Clone, PartialOrd, PartialEq)]
pub enum NonFinalNode {
Escape,
HexEscape0,
HexEscape1(u8),
OpenClass0,
OpenClassNeg,
OpenClass( bool, Vec<ClassItem>),
OpenByteRange(u8),
ByteRange(u8, u8),
OpenGroup,
OpenExtendedGroup,
OpenNonCapturingGroup,
OpenAlt(Vec<FinalNode>),
RepeatMin(String),
RepeatMax(String, String),
RepeatToken(String, usize, Option<usize>),
}
impl core::fmt::Debug for NonFinalNode {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> Result<(), core::fmt::Error> {
match self {
NonFinalNode::Escape => write!(f, "Escape"),
NonFinalNode::HexEscape0 => write!(f, "HexEscape0"),
NonFinalNode::HexEscape1(b) => write!(f, "HexEscape1({})", escape_ascii([*b])),
NonFinalNode::OpenClass0 => write!(f, "OpenClass0"),
NonFinalNode::OpenClassNeg => write!(f, "OpenClassNeg"),
NonFinalNode::OpenClass(true, items) => {
write!(f, "OpenClass{:?}", items)
}
NonFinalNode::OpenClass(false, items) => {
write!(f, "OpenClass^{:?}", items)
}
NonFinalNode::OpenByteRange(b) => write!(f, "OpenByteRange({})", escape_ascii([*b])),
NonFinalNode::ByteRange(a, b) => write!(
f,
"ByteRange({}-{})",
escape_ascii([*a]),
escape_ascii([*b])
),
NonFinalNode::OpenGroup => write!(f, "OpenGroup"),
NonFinalNode::OpenExtendedGroup => write!(f, "OpenExtendedGroup"),
NonFinalNode::OpenNonCapturingGroup => write!(f, "OpenNonCapturingGroup"),
NonFinalNode::OpenAlt(nodes) => write!(f, "OpenAlt{:?}", nodes),
NonFinalNode::RepeatMin(min) => write!(f, "RepeatMin({})", min),
NonFinalNode::RepeatMax(min, max) => write!(f, "RepeatMax({},{})", min, max),
NonFinalNode::RepeatToken(printable, min, opt_max) => {
write!(f, "RepeatToken({:?},{},{:?})", printable, min, opt_max)
}
}
}
}
impl NonFinalNode {
#[must_use]
pub fn reason(&self) -> String {
match self {
NonFinalNode::Escape => "incomplete escape sequence: `\\`".to_string(),
NonFinalNode::HexEscape0 => "incomplete escape sequence: `\\x`".to_string(),
NonFinalNode::HexEscape1(d) => {
format!("incomplete escape sequence: `\\x{}`", escape_ascii([*d]))
}
NonFinalNode::OpenClass0
| NonFinalNode::OpenClassNeg
| NonFinalNode::OpenClass(..)
| NonFinalNode::ByteRange(..) => "missing closing `]`".to_string(),
NonFinalNode::OpenByteRange(b) => {
format!("missing byte to close range: `{}-`", escape_ascii([*b]))
}
NonFinalNode::OpenGroup
| NonFinalNode::OpenExtendedGroup
| NonFinalNode::OpenNonCapturingGroup => "missing closing `)`".to_string(),
NonFinalNode::OpenAlt(_) => "missing element after bar `|`".to_string(),
NonFinalNode::RepeatMin(min) => {
format!("missing closing `}}` symbol: `{{{}`", min)
}
NonFinalNode::RepeatMax(min, max) => {
format!("missing closing `}}` symbol: `{{{},{}`", min, max)
}
NonFinalNode::RepeatToken(printable, _, _) => {
format!("missing element before repeat element: `{}`", printable)
}
}
}
#[allow(clippy::must_use_candidate)]
#[allow(clippy::missing_panics_doc)]
pub fn unwrap_open_class(self) -> (bool, Vec<ClassItem>) {
match self {
NonFinalNode::OpenClass(incl, items) => (incl, items),
other => panic!("unwrap_open_class() called on value: {:?}", other),
}
}
#[allow(clippy::must_use_candidate)]
#[allow(clippy::missing_panics_doc)]
pub fn unwrap_open_alt(self) -> Vec<FinalNode> {
match self {
NonFinalNode::OpenAlt(nodes) => nodes,
other => panic!("unwrap_open_alt() called on value: {:?}", other),
}
}
#[allow(clippy::must_use_candidate)]
#[allow(clippy::missing_panics_doc)]
pub fn unwrap_repeat_min(self) -> String {
match self {
NonFinalNode::RepeatMin(min) => min,
other => panic!("unwrap_repeat_min() called on value: {:?}", other),
}
}
#[allow(clippy::must_use_candidate)]
#[allow(clippy::missing_panics_doc)]
pub fn unwrap_repeat_max(self) -> (String, String) {
match self {
NonFinalNode::RepeatMax(min, max) => (min, max),
other => panic!("unwrap_repeat_max() called on value: {:?}", other),
}
}
}
#[derive(Clone, PartialOrd, PartialEq)]
pub enum FinalNode {
Byte(u8),
AnyByte,
Seq(Vec<FinalNode>),
Class( bool, Vec<ClassItem>),
Group(Box<FinalNode>),
NonCapturingGroup(Box<FinalNode>),
Alt(Vec<FinalNode>),
Repeat(Box<FinalNode>, usize, Option<usize>),
}
impl FinalNode {
#[allow(clippy::must_use_candidate)]
#[allow(clippy::missing_panics_doc)]
pub fn unwrap_alt(self) -> Vec<FinalNode> {
match self {
FinalNode::Alt(nodes) => nodes,
other => panic!("unwrap_alt() called on value: {:?}", other),
}
}
}
impl core::fmt::Debug for FinalNode {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> Result<(), core::fmt::Error> {
match self {
FinalNode::Byte(b) => write!(f, "Byte({})", escape_ascii([*b])),
FinalNode::AnyByte => write!(f, "AnyByte"),
FinalNode::Seq(nodes) => write!(f, "Seq{:?}", nodes),
FinalNode::Class(true, items) => write!(f, "Class{:?}", items),
FinalNode::Class(false, items) => write!(f, "Class^{:?}", items),
FinalNode::Group(nodes) => write!(f, "Group({:?})", nodes),
FinalNode::NonCapturingGroup(nodes) => write!(f, "NonCapturingGroup({:?})", nodes),
FinalNode::Alt(nodes) => write!(f, "Alt{:?}", nodes),
FinalNode::Repeat(node, min, opt_max) => {
write!(f, "Repeat({:?},{}-{:?})", node, min, opt_max)
}
}
}
}
#[allow(clippy::too_many_lines)]
fn apply_rule_once(
mut prev: &mut Option<Node>,
mut last: &mut Option<Node>,
byte: &mut Option<u8>,
) -> Result<Option<Node>, String> {
use FinalNode::{Alt, AnyByte, Byte, Class, Group, NonCapturingGroup, Repeat, Seq};
use Node::{Final, NonFinal};
use NonFinalNode::{
ByteRange, Escape, HexEscape0, HexEscape1, OpenAlt, OpenByteRange, OpenClass, OpenClass0,
OpenClassNeg, OpenExtendedGroup, OpenGroup, OpenNonCapturingGroup, RepeatMax, RepeatMin,
RepeatToken,
};
#[allow(clippy::match_same_arms, clippy::unnested_or_patterns)]
match (&mut prev, &mut last, byte.map(|b| b)) {
(Some(_), None, _) => unreachable!(),
(Some(NonFinal(OpenByteRange(a))), Some(Final(Byte(b))), _) => {
let node = NonFinal(ByteRange(*a, *b));
prev.take();
last.take();
Ok(Some(node))
}
(Some(NonFinal(OpenClass0)), Some(Final(Byte(b))), _) => {
let node = NonFinal(OpenClass(true, vec![ClassItem::Byte(*b)]));
prev.take();
last.take();
Ok(Some(node))
}
(Some(NonFinal(OpenClassNeg)), Some(Final(Byte(b))), _) => {
let node = NonFinal(OpenClass(false, vec![ClassItem::Byte(*b)]));
prev.take();
last.take();
Ok(Some(node))
}
(Some(NonFinal(OpenClass(_, items))), Some(Final(Byte(b))), _) => {
let item = ClassItem::Byte(*b);
last.take();
items.push(item);
Ok(None)
}
(Some(NonFinal(OpenClass(_, items))), Some(NonFinal(ByteRange(a, b))), _) => {
let item = ClassItem::ByteRange(*a, *b);
last.take();
items.push(item);
Ok(None)
}
(None, Some(NonFinal(RepeatToken(printable, _, _))), _)
| (Some(NonFinal(_)), Some(NonFinal(RepeatToken(printable, _, _))), _) => Err(format!(
"missing element before repeat element: `{}`",
printable
)),
(Some(Final(Seq(items))), Some(NonFinal(RepeatToken(_, min, opt_max))), _) => {
let min_copy = *min;
let opt_max_copy = *opt_max;
last.take();
let last_item = items.pop().unwrap();
items.push(Repeat(Box::new(last_item), min_copy, opt_max_copy));
Ok(None)
}
(Some(Final(_)), Some(NonFinal(RepeatToken(_, min, opt_max))), _) => {
let inner = prev.take().unwrap().unwrap_final();
let node = Final(Repeat(Box::new(inner), *min, *opt_max));
last.take();
Ok(Some(node))
}
(Some(NonFinal(OpenGroup)), Some(Final(_)), None) => Err(OpenGroup.reason()),
(Some(NonFinal(OpenExtendedGroup)), Some(Final(_)), None) => {
Err(OpenExtendedGroup.reason())
}
(Some(NonFinal(OpenNonCapturingGroup)), Some(Final(_)), None) => {
Err(OpenNonCapturingGroup.reason())
}
(Some(Final(Seq(nodes))), Some(Final(_)), _) => {
let node = last.take().unwrap().unwrap_final();
nodes.push(node);
Ok(None)
}
(Some(NonFinal(OpenAlt(_))), Some(Final(_)), b)
if b != Some(b'?') && b != Some(b'+') && b != Some(b'*') && b != Some(b'{') =>
{
let node = last.take().unwrap().unwrap_final();
let mut nodes = prev.take().unwrap().unwrap_non_final().unwrap_open_alt();
nodes.push(node);
Ok(Some(Final(Alt(nodes))))
}
(Some(Final(Alt(nodes))), Some(Final(_)), b)
if b != Some(b'?') && b != Some(b'+') && b != Some(b'*') && b != Some(b'{') =>
{
let node = last.take().unwrap().unwrap_final();
if let Seq(ref mut seq_nodes) = nodes.last_mut().unwrap() {
seq_nodes.push(node);
} else {
let prev_node = nodes.pop().unwrap();
nodes.push(Seq(vec![prev_node, node]));
}
Ok(None)
}
(Some(Final(_)), Some(Final(_)), b)
if b != Some(b'?') && b != Some(b'+') && b != Some(b'*') && b != Some(b'{') =>
{
Ok(Some(Final(Seq(vec![
prev.take().unwrap().unwrap_final(),
last.take().unwrap().unwrap_final(),
]))))
}
(_, Some(NonFinal(Escape)), Some(b'\\')) => {
last.take();
byte.take();
Ok(Some(Final(Byte(b'\\'))))
}
(_, _, Some(b'\\')) => {
byte.take();
Ok(Some(NonFinal(Escape)))
}
(_, Some(NonFinal(Escape)), Some(b'n')) => {
last.take();
byte.take();
Ok(Some(Final(Byte(b'\n'))))
}
(_, Some(NonFinal(Escape)), Some(b'r')) => {
last.take();
byte.take();
Ok(Some(Final(Byte(b'\r'))))
}
(_, Some(NonFinal(Escape)), Some(b't')) => {
last.take();
byte.take();
Ok(Some(Final(Byte(b'\t'))))
}
(_, Some(NonFinal(Escape)), Some(b'0')) => {
last.take();
byte.take();
Ok(Some(Final(Byte(0))))
}
(_, Some(NonFinal(Escape)), Some(b)) if b"'\"?+.*^$|(){}[]".contains(&b) => {
let node = Final(Byte(b));
last.take();
byte.take();
Ok(Some(node))
}
(_, Some(NonFinal(Escape)), Some(b'x')) => {
last.take();
byte.take();
Ok(Some(NonFinal(HexEscape0)))
}
(_, Some(NonFinal(Escape)), Some(d)) => {
Err(format!("invalid escape sequence `\\{}`", escape_ascii([d])))
}
(_, Some(NonFinal(HexEscape0)), Some(d)) => {
last.take();
byte.take();
Ok(Some(NonFinal(HexEscape1(d))))
}
(_, Some(NonFinal(HexEscape1(d1))), Some(d0))
if d1.is_ascii_hexdigit() && d0.is_ascii_hexdigit() =>
{
let string = String::from_utf8(vec![*d1, d0]).unwrap();
let b = u8::from_str_radix(&string, 16).unwrap();
last.take();
byte.take();
Ok(Some(Final(Byte(b))))
}
(_, Some(NonFinal(HexEscape1(d1))), Some(d0)) => Err(format!(
"invalid escape sequence `\\x{}`",
escape_ascii([*d1, d0])
)),
(_, Some(NonFinal(OpenClass0)), Some(b'['))
| (_, Some(NonFinal(OpenClassNeg)), Some(b'['))
| (_, Some(NonFinal(OpenClass(..))), Some(b'[')) => {
byte.take();
Ok(Some(Final(Byte(b'['))))
}
(_, _, Some(b'[')) => {
byte.take();
Ok(Some(NonFinal(OpenClass0)))
}
(_, Some(NonFinal(OpenClass0)), Some(b'^')) => {
last.take();
byte.take();
Ok(Some(NonFinal(OpenClassNeg)))
}
(_, Some(NonFinal(OpenClass(_, ref mut items))), Some(b'-')) => {
byte.take();
match items.pop().unwrap() {
ClassItem::Byte(b) => Ok(Some(NonFinal(OpenByteRange(b)))),
ClassItem::ByteRange(a, b) => Err(format!(
"expected byte before '-' symbol, not range: `{}-{}-`",
escape_ascii([a]),
escape_ascii([b])
)),
}
}
(_, Some(NonFinal(OpenClass0)), Some(b']')) => {
last.take();
byte.take();
Ok(Some(Final(Class(true, Vec::new()))))
}
(_, Some(NonFinal(OpenClassNeg)), Some(b']')) => {
last.take();
byte.take();
Ok(Some(Final(Class(false, Vec::new()))))
}
(_, Some(NonFinal(OpenClass(..))), Some(b']')) => {
byte.take();
let (incl, items) = last.take().unwrap().unwrap_non_final().unwrap_open_class();
Ok(Some(Final(Class(incl, items))))
}
(Some(NonFinal(OpenClass(..))), Some(NonFinal(non_final)), Some(b']')) => {
Err(non_final.reason())
}
(_, Some(NonFinal(OpenClass0)), Some(b))
| (_, Some(NonFinal(OpenClassNeg)), Some(b))
| (_, Some(NonFinal(OpenClass(..))), Some(b))
if b != b']' =>
{
byte.take();
Ok(Some(Final(Byte(b))))
}
(_, _, Some(b'(')) => {
byte.take();
Ok(Some(NonFinal(OpenGroup)))
}
(_, Some(NonFinal(OpenGroup)), Some(b')')) => {
last.take();
byte.take();
Ok(Some(Final(Group(Box::new(Seq(vec![]))))))
}
(_, Some(NonFinal(OpenGroup)), Some(b'?')) => {
last.take();
byte.take();
Ok(Some(NonFinal(OpenExtendedGroup)))
}
(_, Some(NonFinal(OpenExtendedGroup)), Some(b':')) => {
last.take();
byte.take();
Ok(Some(NonFinal(OpenNonCapturingGroup)))
}
(_, Some(NonFinal(OpenExtendedGroup)), Some(_)) => {
Err("unexpected symbol after `(?`".to_string())
}
(_, Some(NonFinal(OpenNonCapturingGroup)), Some(b')')) => {
last.take();
byte.take();
Ok(Some(Final(NonCapturingGroup(Box::new(Seq(vec![]))))))
}
(Some(NonFinal(OpenGroup)), Some(NonFinal(non_final)), Some(b')')) => {
Err(non_final.reason())
}
(Some(NonFinal(OpenNonCapturingGroup)), Some(NonFinal(non_final)), Some(b')')) => {
Err(non_final.reason())
}
(Some(NonFinal(OpenGroup)), Some(Final(_)), Some(b')')) => {
byte.take();
let node = last.take().unwrap().unwrap_final();
prev.take();
Ok(Some(Final(Group(Box::new(node)))))
}
(Some(NonFinal(OpenNonCapturingGroup)), Some(Final(_)), Some(b')')) => {
byte.take();
let node = last.take().unwrap().unwrap_final();
prev.take();
Ok(Some(Final(NonCapturingGroup(Box::new(node)))))
}
(_, _, Some(b'?')) => {
byte.take();
Ok(Some(NonFinal(RepeatToken("?".to_string(), 0, Some(1)))))
}
(_, _, Some(b'*')) => {
byte.take();
Ok(Some(NonFinal(RepeatToken("*".to_string(), 0, None))))
}
(_, _, Some(b'+')) => {
byte.take();
Ok(Some(NonFinal(RepeatToken("+".to_string(), 1, None))))
}
(_, _, Some(b'{')) => {
byte.take();
Ok(Some(NonFinal(RepeatMin(String::new()))))
}
(_, Some(NonFinal(RepeatMin(_))), Some(b',')) => {
byte.take();
let min = last.take().unwrap().unwrap_non_final().unwrap_repeat_min();
Ok(Some(NonFinal(RepeatMax(min, String::new()))))
}
(_, Some(NonFinal(RepeatMin(_))), Some(b'}')) => {
byte.take();
let min = last.take().unwrap().unwrap_non_final().unwrap_repeat_min();
let min_usize = min
.parse::<usize>()
.map_err(|e| format!("invalid repetition value `{{{}}}`: {}", min, e))?;
Ok(Some(NonFinal(RepeatToken(
format!("{{{}}}", min),
min_usize,
Some(min_usize),
))))
}
(_, Some(NonFinal(RepeatMin(min))), Some(b)) => {
byte.take();
min.push(char::from(b));
Ok(None)
}
(_, Some(NonFinal(RepeatMax(..))), Some(b'}')) => {
byte.take();
let (min, max) = last.take().unwrap().unwrap_non_final().unwrap_repeat_max();
let min_usize = if min.is_empty() {
0
} else {
min.parse::<usize>()
.map_err(|e| format!("invalid repetition value `{{{},{}}}`: {}", min, max, e))?
};
let max_opt_usize = if max.is_empty() {
None
} else {
let max_usize = max.parse::<usize>().map_err(|e| {
format!("invalid repetition value `{{{},{}}}`: {}", min, max, e)
})?;
if max_usize < min_usize {
return Err(format!(
"repeating element has max that is smaller than min: `{{{},{}}}`",
min, max
));
}
Some(max_usize)
};
Ok(Some(NonFinal(RepeatToken(
format!("{{{},{}}}", min, max),
min_usize,
max_opt_usize,
))))
}
(_, Some(NonFinal(RepeatMax(_, ref mut max))), Some(b)) => {
max.push(char::from(b));
byte.take();
Ok(None)
}
(_, _, Some(b'.')) => {
byte.take();
Ok(Some(Final(AnyByte)))
}
(_, Some(Final(Alt(_))), Some(b'|')) => {
byte.take();
let nodes = last.take().unwrap().unwrap_final().unwrap_alt();
Ok(Some(NonFinal(OpenAlt(nodes))))
}
(_, Some(Final(_)), Some(b'|')) => {
byte.take();
let node = last.take().unwrap().unwrap_final();
Ok(Some(NonFinal(OpenAlt(vec![node]))))
}
(_, None, Some(b'|')) => Err("missing element before bar `|`".to_string()),
(_, _, Some(b)) => {
byte.take();
Ok(Some(Final(Byte(b))))
}
(_, Some(NonFinal(node)), None) => Err(node.reason()),
(None, None, None) => unreachable!(),
(None, Some(Final(_)), None) => unreachable!(),
(Some(NonFinal(Escape)), Some(Final(_)), _) => unreachable!(),
(Some(NonFinal(HexEscape0)), Some(Final(_)), _) => unreachable!(),
(Some(NonFinal(HexEscape1(_))), Some(Final(_)), _) => unreachable!(),
(Some(NonFinal(RepeatMin(..))), Some(Final(_)), _) => unreachable!(),
(Some(NonFinal(RepeatMax(..))), Some(Final(_)), _) => unreachable!(),
(Some(NonFinal(OpenClass0)), Some(Final(_)), _) => unreachable!(),
(Some(NonFinal(OpenClassNeg)), Some(Final(_)), _) => unreachable!(),
(Some(NonFinal(OpenClass(..))), Some(Final(_)), _) => unreachable!(),
(Some(NonFinal(OpenAlt(_))), Some(Final(_)), _) => unreachable!(),
(Some(NonFinal(OpenByteRange(_))), Some(Final(_)), _) => unreachable!(),
(Some(NonFinal(ByteRange(..))), Some(Final(_)), _) => unreachable!(),
(Some(NonFinal(RepeatToken(..))), Some(Final(_)), _) => unreachable!(),
(Some(Final(_)), Some(Final(_)), _) => unreachable!(),
}
}
#[allow(clippy::missing_panics_doc)]
pub fn parse(regex: &[u8]) -> Result<FinalNode, String> {
if regex.is_empty() {
return Ok(FinalNode::Seq(Vec::new()));
}
let mut data_iter = regex.iter().copied().peekable();
let mut stack: Vec<Node> = Vec::new();
while data_iter.peek().is_some() || stack.len() > 1 {
crate::dprintln!(
"process {:?} next={:?}",
stack,
data_iter.peek().map(|b| escape_ascii([*b]))
);
let mut byte = data_iter.peek().copied();
let byte_was_some = byte.is_some();
let mut last = stack.pop();
let mut prev = stack.pop();
let mut to_push = apply_rule_once(&mut prev, &mut last, &mut byte)?;
if let Some(node) = prev.take() {
stack.push(node);
}
if let Some(node) = last.take() {
stack.push(node);
}
if let Some(node) = to_push.take() {
stack.push(node);
}
if byte_was_some && byte.is_none() {
data_iter.next().unwrap();
}
}
crate::dprintln!("stack {:?}", stack);
for node in stack.iter().rev() {
if let Node::NonFinal(non_final) = node {
return Err(non_final.reason());
}
}
assert_eq!(1, stack.len());
Ok(stack.pop().unwrap().unwrap_final())
}