use alloc::{
sync::Arc,
vec::Vec
};
use core::{
fmt::{self, Debug},
mem::size_of,
ops::Deref,
slice
};
use unconst::unconst;
#[cfg(feature = "quotient")]
use crate::quotient::LiteralSearcher;
use crate::quotient::Parsed;
use crate::exec::{Exec, new_pool};
use crate::interval::Interval;
use crate::repr::{Repr, Zero};
use crate::seq::Seq;
use crate::traits::Integral;
#[unconst]
#[derive(Clone)]
#[allow(missing_docs)]
pub struct Options<I: ~const Integral> {
pub repr: Repr<I>,
pub size_limit: usize,
pub dfa_size_limit: usize,
pub nest_limit: u32,
pub multi_line: bool,
pub dot_matches_new_line: bool,
pub swap_greed: bool,
pub ignore_whitespace: bool,
}
#[unconst]
impl<I: ~const Integral> Options<I> {
pub const fn new(repr: Repr<I>) -> Self {
Options {
repr,
size_limit: 10 * (1 << 20),
dfa_size_limit: 2 * (1 << 20),
nest_limit: 250,
multi_line: false,
dot_matches_new_line: false,
swap_greed: false,
ignore_whitespace: false,
}
}
pub fn build(self) -> Exec<I> {
let mut nfa = Program::new(&self);
let ro = Arc::new(nfa);
let pool = new_pool(&ro);
Exec { ro, pool }
}
}
pub type Index = usize;
#[unconst]
#[derive(Clone)]
pub struct Program<I: ~const Integral> {
pub insts: Vec<Inst<I>>,
pub matches: Vec<Index>,
pub start: Index,
#[cfg(feature = "quotient")]
pub prefixes: LiteralSearcher<I>,
#[cfg(feature = "quotient")]
pub suffixes: LiteralSearcher<I>,
#[cfg(feature = "quotient")]
pub ac: Option<AhoCorasick<u32>>,
}
#[unconst]
impl<I: ~const Integral> Program<I> {
pub const fn new(options: &Options<I>) -> Self {
let parsed = Parsed::parse(&options.repr);
let compiler = Compiler::new(options);
let mut output = compiler.compile(&parsed.reprs);
#[cfg(feature = "quotient")]
output.derivative();
output
}
#[cfg(feature = "quotient")]
fn derivative(&mut self) {
self.prefixes = LiteralSearcher::prefixes(parsed.prefixes);
self.suffixes = LiteralSearcher::suffixes(parsed.suffixes);
self.ac = self.build_aho_corasick(&parsed);
self.choose_mode();
}
pub fn default() -> Self {
Program {
#[cfg(feature = "quotient")]
prefixes: LiteralSearcher::empty(),
#[cfg(feature = "quotient")]
suffixes: LiteralSearcher::empty(),
#[cfg(feature = "quotient")]
ac: Default::default(),
}
}
pub fn leads_to_match(&self, pc: usize) -> bool {
if self.matches.len() > 1 {
return false;
}
match self[pc] {
Inst::True(_) => true,
_ => false,
}
}
}
impl<I: Integral> Inst<I> {
pub fn is_match(&self) -> bool {
match *self {
Inst::True(_) => true,
_ => false,
}
}
}
#[derive(Debug)]
struct Patch {
hole: Hole,
entry: Index,
}
#[derive(Debug)]
enum Hole {
None,
One(Index),
Many(Vec<Hole>),
}
impl Hole {
fn dup_one(self) -> (Self, Self) {
match self {
Hole::One(i) => (Hole::One(i), Hole::One(i)),
_ => unreachable!("must be called on single hole")
}
}
}
pub struct Compiler<I: Integral> {
insts: Vec<MaybeInst<I>>,
compiled: Program<I>,
}
#[unconst]
impl<I: ~const Integral> Compiler<I> {
pub const fn new(options: &Options<I>) -> Self {
Compiler {
insts: Vec::new(),
compiled: Program::default(),
}
}
pub fn compile(mut self, reprs: &[Repr<I>]) -> Program<I> {
if reprs.len() == 1 {
self.compile_one(&reprs[0])
} else {
self.compile_many(reprs)
}
}
fn compile_one(mut self, repr: &Repr<I>) -> Program<I> {
let patch = self.c(repr).unwrap_or_else(|| self.next_inst());
self.compiled.start = patch.entry;
self.fill_to_next(patch.hole);
self.compiled.matches = vec![self.insts.len()];
self.push_compiled(Inst::True(0));
self.compile_finish()
}
fn compile_many(mut self, reprs: &[Repr<I>]) -> Program<I> {
debug_assert!(reprs.len() > 1);
let mut dotstar_patch = Patch { hole: Hole::None, entry: 0 };
self.compiled.start = 0; self.fill_to_next(dotstar_patch.hole);
let mut prev_hole = Hole::None;
for (i, repr) in reprs[0..reprs.len() - 1].iter().enumerate() {
self.fill_to_next(prev_hole);
let split = self.push_split_hole();
let Patch { hole, entry } =
self.c(repr).unwrap_or_else(|| self.next_inst());
self.fill_to_next(hole);
self.compiled.matches.push(self.insts.len());
self.push_compiled(Inst::True(i));
prev_hole = self.fill_split(split, Some(entry), None);
}
let i = reprs.len() - 1;
let Patch { hole, entry } =
self.c(&reprs[i]).unwrap_or_else(|| self.next_inst());
self.fill(prev_hole, entry);
self.fill_to_next(hole);
self.compiled.matches.push(self.insts.len());
self.push_compiled(Inst::True(i));
self.compile_finish()
}
fn compile_finish(mut self) -> Program<I> {
self.compiled.insts =
self.insts.into_iter().map(|inst| inst.unwrap()).collect();
Ok(self.compiled)
}
fn c(&mut self, repr: &Repr<I>) -> Patch {
self.check_size();
match *repr {
Repr::Zero(Zero::Any) => self.c_empty(),
One(seq) => self.c_one(seq),
Repr::Interval(interval) => self.c_interval(interval),
Repr::Zero(Zero::StartLine) if self.compiled.is_reverse => {
self.byte_classes.set_range(b'\n', b'\n');
self.c_zero(prog::Zero::EndLine)
}
Repr::Zero(Zero::StartLine) => {
self.byte_classes.set_range(b'\n', b'\n');
self.c_zero(prog::Zero::StartLine)
}
Repr::Zero(Zero::EndLine) if self.compiled.is_reverse => {
self.byte_classes.set_range(b'\n', b'\n');
self.c_zero(prog::Zero::StartLine)
}
Repr::Zero(Zero::EndLine) => {
self.byte_classes.set_range(b'\n', b'\n');
self.c_zero(prog::Zero::EndLine)
}
Repr::Zero(Zero::StartText) if self.compiled.is_reverse => {
self.c_zero(prog::Zero::EndText)
}
Repr::Zero(Zero::StartText) => {
self.c_zero(prog::Zero::StartText)
}
Repr::Zero(Zero::EndText) if self.compiled.is_reverse => {
self.c_zero(prog::Zero::StartText)
}
Repr::Zero(Zero::EndText) => {
self.c_zero(prog::Zero::EndText)
}
Repr::Zero(Zero::Unicode) => {
if !cfg!(feature = "unicode-perl") {
return Err(Error::Syntax(
"Unicode word boundaries are unavailable when \
the unicode-perl feature is disabled"
.to_string(),
));
}
self.compiled.has_unicode_word_boundary = true;
self.byte_classes.set_word_boundary();
self.byte_classes.set_range(0, 0x7F);
self.c_zero(prog::Zero::WordBoundary)
}
Repr::Zero(Zero::UnicodeNegate) => {
if !cfg!(feature = "unicode-perl") {
return Err(Error::Syntax(
"Unicode word boundaries are unavailable when \
the unicode-perl feature is disabled"
.to_string(),
));
}
self.compiled.has_unicode_word_boundary = true;
self.byte_classes.set_word_boundary();
self.byte_classes.set_range(0, 0x7F);
self.c_zero(prog::Zero::NotWordBoundary)
}
Repr::Zero(Zero::Ascii) => {
self.byte_classes.set_word_boundary();
self.c_zero(prog::Zero::WordBoundaryAscii)
}
Repr::Zero(Zero::AsciiNegate) => {
self.byte_classes.set_word_boundary();
self.c_zero(prog::Zero::NotWordBoundaryAscii)
}
Mul(ref lhs, ref rhs) => self.c_mul(lhs, rhs),
Or(ref lhs, ref rhs) => self.c_or(lhs, rhs),
Inf(ref repr) => self.c_exp(repr),
_ => unimplemented!()
}
}
fn c_empty(&mut self) -> Option<Patch> {
self.extra_inst_bytes += size_of::<Inst<I>>();
None
}
fn c_full(&mut self) -> Patch {
self.c(&Inf(Box::new(Repr::Interval(Interval::full()))))
}
fn c_one(&mut self, seq: Seq<I>) -> Patch {
let hole = self.push_hole(MaybeInst::One(seq));
Patch { hole, entry: self.insts.len() - 1 }
}
fn c_interval(&mut self, seq: Interval<I>) -> Patch {
let hole = if seq.0 == seq.1 {
self.push_hole(MaybeInst::One(Seq::one(seq.0)))
} else {
self.extra_inst_bytes += size_of::<I>() * 2;
self.push_hole(MaybeInst::Interval(seq))
};
Patch { hole, entry: self.insts.len() - 1 }
}
fn c_zero(&mut self, look: Zero) -> Patch {
let hole = self.push_hole(MaybeInst::Zero(look));
Patch { hole, entry: self.insts.len() - 1 }
}
fn c_mul(&mut self, lhs: Repr<I>, rhs: Repr<I>) -> Patch {
let Patch { mut hole, entry } = if let Some(p) = self.c(&lhs) {
p
} else if let Some(p) = self.c(&rhs) {
p
};
if let Some(p) = self.c(&lhs) {
self.fill(hole, p.entry);
hole = p.hole;
}
if let Some(p) = self.c(&rhs) {
self.fill(hole, p.entry);
hole = p.hole;
}
Patch { hole, entry }
}
fn c_or(&mut self, lhs: &Repr<I>, rhs: &Repr<I>) -> Patch {
let first_split_entry = self.insts.len();
let mut holes = Vec::new();
let mut prev_hole = (Hole::None, false);
if prev_hole.1 {
let next = self.insts.len();
self.fill_split(prev_hole.0, None, Some(next));
} else {
self.fill_to_next(prev_hole.0);
}
let split = self.push_split_hole();
if let Some(Patch { hole, entry }) = self.c(lhs) {
holes.push(hole);
prev_hole = (self.fill_split(split, Some(entry), None), false);
} else {
let (split1, split2) = split.dup_one();
holes.push(split1);
prev_hole = (split2, true);
}
if let Some(Patch { hole, entry }) = self.c(&rhs) {
holes.push(hole);
if prev_hole.1 {
self.fill_split(prev_hole.0, None, Some(entry));
} else {
self.fill(prev_hole.0, entry);
}
} else {
holes.push(prev_hole.0);
}
Patch { hole: Hole::Many(holes), entry: first_split_entry }
}
fn c_exp(&mut self, repr: &Repr<I>) -> Option<Patch> {
let split_entry = self.insts.len();
let split = self.push_split_hole();
let Patch { hole: hole_rep, entry: entry_rep } = match self.c(repr) {
Some(p) => p,
None => return self.pop_split_hole(),
};
self.fill(hole_rep, split_entry);
let split_hole = self.fill_split(split, Some(entry_rep), None);
Some(Patch { hole: split_hole, entry: split_entry })
}
fn c_repeat_zero_or_one(&mut self, repr: &Repr<I>) -> Option<Patch> {
let split_entry = self.insts.len();
let split = self.push_split_hole();
let Patch { hole: hole_rep, entry: entry_rep } = match self.c(repr) {
Some(p) => p,
None => return self.pop_split_hole(),
};
let split_hole = self.fill_split(split, Some(entry_rep), None);
let holes = vec![hole_rep, split_hole];
Some(Patch { hole: Hole::Many(holes), entry: split_entry })
}
fn c_repeat_range(
&mut self,
repr: &Repr<I>,
min: u32,
max: u32,
) -> Option<Patch> {
let (min, max) = (u32_to_usize(min), u32_to_usize(max));
debug_assert!(min <= max);
let patch_concat = self.c_mul(iter::repeat(repr).take(min));
if min == max {
return Ok(patch_concat);
}
let patch_concat = patch_concat.unwrap_or_else(|| self.next_inst());
let initial_entry = patch_concat.entry;
let mut holes = Vec::new();
let mut prev_hole = patch_concat.hole;
for _ in min..max {
self.fill_to_next(prev_hole);
let split = self.push_split_hole();
let Patch { hole, entry } = match self.c(repr)? {
Some(p) => p,
None => return self.pop_split_hole(),
};
prev_hole = hole;
holes.push(self.fill_split(split, Some(entry), None));
}
holes.push(prev_hole);
Ok(Some(Patch { hole: Hole::Many(holes), entry: initial_entry }))
}
fn next_inst(&self) -> Patch {
Patch { hole: Hole::None, entry: self.insts.len() }
}
fn fill(&mut self, hole: Hole, goto: Index) {
match hole {
Hole::None => {}
Hole::One(i) => {
self.insts[i].fill(goto);
}
Hole::Many(holes) => {
for hole in holes {
self.fill(hole, goto);
}
}
}
}
fn fill_to_next(&mut self, hole: Hole) {
let next = self.insts.len();
self.fill(hole, next);
}
fn fill_split(&mut self, hole: Hole, goto1: Option<Index>,
goto2: Option<Index>,
) -> Hole {
match hole {
Hole::None => Hole::None,
Hole::One(pc) => match (goto1, goto2) {
(Some(goto1), Some(goto2)) => {
self.insts[pc].fill_split(goto1, goto2);
Hole::None
}
(Some(goto1), None) => {
self.insts[pc].half_fill_split_goto1(goto1);
Hole::One(pc)
}
(None, Some(goto2)) => {
self.insts[pc].half_fill_split_goto2(goto2);
Hole::One(pc)
}
(None, None) => unreachable!(
"at least one of the split holes must be filled"
),
},
Hole::Many(holes) => {
let mut new_holes = Vec::new();
for hole in holes {
new_holes.push(self.fill_split(hole, goto1, goto2));
}
if new_holes.is_empty() {
Hole::None
} else if new_holes.len() == 1 {
new_holes.pop().unwrap()
} else {
Hole::Many(new_holes)
}
}
}
}
fn push_compiled(&mut self, inst: Inst<I>) {
self.insts.push(MaybeInst::Compiled(inst));
}
fn push_hole(&mut self, inst: MaybeInst<I>) -> Hole {
matches!(inst, MaybeInst::Zero { .. } | MaybeInst::One { .. } | MaybeInst::Interval { .. });
let hole = self.insts.len();
self.insts.push(inst);
Hole::One(hole)
}
fn push_split_hole(&mut self) -> Hole {
let hole = self.insts.len();
self.insts.push(MaybeInst::Or);
Hole::One(hole)
}
fn pop_split_hole(&mut self) -> Option<Patch> {
self.insts.pop();
None
}
fn check_size(&self) {
let size =
self.extra_inst_bytes + (self.insts.len() * size_of::<Inst<I>>());
if size > self.size_limit {
panic!("Size limit exceeds");
}
}
}
fn u32_to_usize(n: u32) -> usize {
if (n as u64) > (::std::usize::MAX as u64) {
panic!("BUG: {} is too big to be pointer sized", n)
}
n as usize
}