use std::cmp::Ordering;
use std::collections::HashMap;
use std::fmt;
use std::mem;
use std::ops::Deref;
use std::slice;
use std::sync::Arc;
use input::Char;
use literal::LiteralSearcher;
pub type InstPtr = usize;
#[derive(Clone)]
pub struct Program {
pub insts: Vec<Inst>,
pub matches: Vec<InstPtr>,
pub captures: Vec<Option<String>>,
pub capture_name_idx: Arc<HashMap<String, usize>>,
pub start: InstPtr,
pub byte_classes: Vec<u8>,
pub only_utf8: bool,
pub is_bytes: bool,
pub is_dfa: bool,
pub is_reverse: bool,
pub is_anchored_start: bool,
pub is_anchored_end: bool,
pub has_unicode_word_boundary: bool,
pub prefixes: LiteralSearcher,
pub dfa_size_limit: usize,
}
impl Program {
pub fn new() -> Self {
Program {
insts: vec![],
matches: vec![],
captures: vec![],
capture_name_idx: Arc::new(HashMap::new()),
start: 0,
byte_classes: vec![0; 256],
only_utf8: true,
is_bytes: false,
is_dfa: false,
is_reverse: false,
is_anchored_start: false,
is_anchored_end: false,
has_unicode_word_boundary: false,
prefixes: LiteralSearcher::empty(),
dfa_size_limit: 2 * (1 << 20),
}
}
pub fn skip(&self, mut pc: usize) -> usize {
loop {
match self[pc] {
Inst::Save(ref i) => pc = i.goto,
_ => return pc,
}
}
}
pub fn leads_to_match(&self, pc: usize) -> bool {
if self.matches.len() > 1 {
return false;
}
match self[self.skip(pc)] {
Inst::Match(_) => true,
_ => false,
}
}
pub fn needs_dotstar(&self) -> bool {
self.is_dfa && !self.is_reverse && !self.is_anchored_start
}
pub fn uses_bytes(&self) -> bool {
self.is_bytes || self.is_dfa
}
pub fn only_utf8(&self) -> bool {
self.only_utf8
}
pub fn approximate_size(&self) -> usize {
(self.len() * mem::size_of::<Inst>())
+ (self.matches.len() * mem::size_of::<InstPtr>())
+ (self.captures.len() * mem::size_of::<Option<String>>())
+ (self.capture_name_idx.len()
* (mem::size_of::<String>() + mem::size_of::<usize>()))
+ (self.byte_classes.len() * mem::size_of::<u8>())
+ self.prefixes.approximate_size()
}
}
impl Deref for Program {
type Target = [Inst];
#[cfg_attr(feature = "perf-inline", inline(always))]
fn deref(&self) -> &Self::Target {
&*self.insts
}
}
impl fmt::Debug for Program {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use self::Inst::*;
fn with_goto(cur: usize, goto: usize, fmtd: String) -> String {
if goto == cur + 1 {
fmtd
} else {
format!("{} (goto: {})", fmtd, goto)
}
}
fn visible_byte(b: u8) -> String {
use std::ascii::escape_default;
let escaped = escape_default(b).collect::<Vec<u8>>();
String::from_utf8_lossy(&escaped).into_owned()
}
for (pc, inst) in self.iter().enumerate() {
match *inst {
Match(slot) => write!(f, "{:04} Match({:?})", pc, slot)?,
Save(ref inst) => {
let s = format!("{:04} Save({})", pc, inst.slot);
write!(f, "{}", with_goto(pc, inst.goto, s))?;
}
Split(ref inst) => {
write!(
f,
"{:04} Split({}, {})",
pc, inst.goto1, inst.goto2
)?;
}
EmptyLook(ref inst) => {
let s = format!("{:?}", inst.look);
write!(f, "{:04} {}", pc, with_goto(pc, inst.goto, s))?;
}
Char(ref inst) => {
let s = format!("{:?}", inst.c);
write!(f, "{:04} {}", pc, with_goto(pc, inst.goto, s))?;
}
Ranges(ref inst) => {
let ranges = inst
.ranges
.iter()
.map(|r| format!("{:?}-{:?}", r.0, r.1))
.collect::<Vec<String>>()
.join(", ");
write!(
f,
"{:04} {}",
pc,
with_goto(pc, inst.goto, ranges)
)?;
}
Bytes(ref inst) => {
let s = format!(
"Bytes({}, {})",
visible_byte(inst.start),
visible_byte(inst.end)
);
write!(f, "{:04} {}", pc, with_goto(pc, inst.goto, s))?;
}
}
if pc == self.start {
write!(f, " (start)")?;
}
write!(f, "\n")?;
}
Ok(())
}
}
impl<'a> IntoIterator for &'a Program {
type Item = &'a Inst;
type IntoIter = slice::Iter<'a, Inst>;
fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}
#[derive(Clone, Debug)]
pub enum Inst {
Match(usize),
Save(InstSave),
Split(InstSplit),
EmptyLook(InstEmptyLook),
Char(InstChar),
Ranges(InstRanges),
Bytes(InstBytes),
}
impl Inst {
pub fn is_match(&self) -> bool {
match *self {
Inst::Match(_) => true,
_ => false,
}
}
}
#[derive(Clone, Debug)]
pub struct InstSave {
pub goto: InstPtr,
pub slot: usize,
}
#[derive(Clone, Debug)]
pub struct InstSplit {
pub goto1: InstPtr,
pub goto2: InstPtr,
}
#[derive(Clone, Debug)]
pub struct InstEmptyLook {
pub goto: InstPtr,
pub look: EmptyLook,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum EmptyLook {
StartLine,
EndLine,
StartText,
EndText,
WordBoundary,
NotWordBoundary,
WordBoundaryAscii,
NotWordBoundaryAscii,
}
#[derive(Clone, Debug)]
pub struct InstChar {
pub goto: InstPtr,
pub c: char,
}
#[derive(Clone, Debug)]
pub struct InstRanges {
pub goto: InstPtr,
pub ranges: Vec<(char, char)>,
}
impl InstRanges {
pub fn matches(&self, c: Char) -> bool {
for r in self.ranges.iter().take(4) {
if c < r.0 {
return false;
}
if c <= r.1 {
return true;
}
}
self.ranges
.binary_search_by(|r| {
if r.1 < c {
Ordering::Less
} else if r.0 > c {
Ordering::Greater
} else {
Ordering::Equal
}
})
.is_ok()
}
pub fn num_chars(&self) -> usize {
self.ranges
.iter()
.map(|&(s, e)| 1 + (e as u32) - (s as u32))
.sum::<u32>() as usize
}
}
#[derive(Clone, Debug)]
pub struct InstBytes {
pub goto: InstPtr,
pub start: u8,
pub end: u8,
}
impl InstBytes {
pub fn matches(&self, byte: u8) -> bool {
self.start <= byte && byte <= self.end
}
}