#![allow(ellipsis_inclusive_range_patterns)]
use std::prelude::v1::*;
use std::{cmp, fmt, mem, str};
#[cfg(feature = "std")]
use std::error;
pub const STACK_SIZE: usize = 4;
pub(crate) const PTR_SKIP: u8 = 0;
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub struct ParsePatError {
kind: PatError,
position: usize,
}
impl fmt::Display for ParsePatError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "Syntax Error @{}: {}.", self.position, self.kind.to_str())
}
}
#[cfg(feature = "std")]
impl error::Error for ParsePatError {
fn description(&self) -> &str {
self.kind.to_str()
}
}
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
enum PatError {
UnpairedHexDigit,
UnknownChar,
ManyOverflow,
ManyRange,
ManyInvalid,
SaveOverflow,
StackError,
StackInvalid,
UnclosedQuote,
AlignedOperand,
ReadOperand,
SubPattern,
SubOverflow,
}
impl PatError {
fn to_str(self) -> &'static str {
match self {
PatError::UnpairedHexDigit => "unpaired hex digit",
PatError::UnknownChar => "unknown character",
PatError::ManyOverflow => "many range exceeded",
PatError::ManyRange => "many bounds nonsensical",
PatError::ManyInvalid => "many invalid syntax",
PatError::SaveOverflow => "save store overflow",
PatError::StackError => "stack unbalanced",
PatError::StackInvalid => "stack must follow jump",
PatError::UnclosedQuote => "string missing end quote",
PatError::AlignedOperand => "aligned operand error",
PatError::ReadOperand => "read operand error",
PatError::SubPattern => "sub pattern error",
PatError::SubOverflow => "sub pattern too large",
}
}
}
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub enum Atom {
Byte(u8),
Save(u8),
Push(u8),
Pop,
Fuzzy(u8),
Skip(u8),
Back(u8),
Rangext(u8),
Many(u8),
Jump1,
Jump4,
Ptr,
Pir(u8),
VTypeName,
Check(u8),
Aligned(u8),
ReadI8(u8),
ReadU8(u8),
ReadI16(u8),
ReadU16(u8),
ReadI32(u8),
ReadU32(u8),
Zero(u8),
Case(u8),
Break(u8),
Nop,
}
pub type Pattern = Vec<Atom>;
pub fn save_len(pat: &[Atom]) -> usize {
pat.iter().filter_map(|&atom| {
match atom {
Atom::Save(slot) | Atom::Pir(slot) | Atom::Check(slot) | Atom::Zero(slot) |
Atom::ReadI8(slot) | Atom::ReadI16(slot) | Atom::ReadI32(slot) |
Atom::ReadU8(slot) | Atom::ReadU16(slot)| Atom::ReadU32(slot) => Some(slot as usize + 1),
_ => None,
}
}).max().unwrap_or(0)
}
pub fn parse(pat: &str) -> Result<Pattern, ParsePatError> {
let mut result = Vec::with_capacity(pat.len() / 2);
let mut pat_end = pat;
match parse_helper(&mut pat_end, &mut result) {
Ok(()) => Ok(result),
Err(kind) => {
let position = pat_end.as_ptr() as usize - pat.as_ptr() as usize;
Err(ParsePatError { kind, position })
},
}
}
fn parse_helper(pat: &mut &str, result: &mut Vec<Atom>) -> Result<(), PatError> {
result.push(Atom::Save(0));
let mut iter = pat.as_bytes().iter();
let mut save = 1;
let mut depth = 0;
#[derive(Default)]
struct SubPattern {
case: usize,
brks: Vec<usize>,
save: u8,
save_next: u8,
depth: u8,
}
let mut subs = Vec::<SubPattern>::new();
while let Some(mut chr) = iter.next().cloned() {
match chr {
b'%' => result.push(Atom::Jump1),
b'$' => result.push(Atom::Jump4),
b'*' => result.push(Atom::Ptr),
b'{' => {
depth += 1;
let atom = match result.last_mut() {
Some(atom @ Atom::Jump1) => mem::replace(atom, Atom::Push(1)),
Some(atom @ Atom::Jump4) => mem::replace(atom, Atom::Push(4)),
Some(atom @ Atom::Ptr) => mem::replace(atom, Atom::Push(PTR_SKIP)),
_ => return Err(PatError::StackInvalid),
};
result.push(atom);
},
b'}' => {
if depth <= 0 {
return Err(PatError::StackError);
}
depth -= 1;
result.push(Atom::Pop);
},
b'(' => {
subs.push(SubPattern::default());
let sub = subs.last_mut().unwrap();
sub.save = save;
sub.depth = depth;
sub.case = result.len();
result.push(Atom::Case(0));
},
b'|' => {
let sub = subs.last_mut().ok_or(PatError::SubPattern)?;
sub.save_next = cmp::max(sub.save_next, save);
save = sub.save;
depth = sub.depth;
sub.brks.push(result.len());
result.push(Atom::Break(0));
let case_offset = result.len() - sub.case - 1;
if case_offset >= 256 {
return Err(PatError::SubOverflow);
}
result[sub.case] = Atom::Case(case_offset as u8);
sub.case = result.len();
result.push(Atom::Case(0));
},
b')' => {
let sub = subs.pop().ok_or(PatError::SubPattern)?;
save = cmp::max(sub.save_next, save);
depth = sub.depth;
result[sub.case] = Atom::Nop;
for &brk in &sub.brks {
let brk_offset = result.len() - brk - 1;
if brk_offset >= 256 {
return Err(PatError::SubOverflow);
}
result[brk] = Atom::Break(brk_offset as u8);
}
},
b'[' => {
let mut lower_bound = 0u32;
let mut at_least_one_char = false;
loop {
chr = iter.next().cloned().ok_or(PatError::ManyInvalid)?;
match chr {
b'-' | b']' => break,
chr @ b'0'...b'9' => {
at_least_one_char = true;
lower_bound = lower_bound * 10 + (chr - b'0') as u32;
if lower_bound >= 16384 {
return Err(PatError::ManyOverflow);
}
},
_ => return Err(PatError::ManyInvalid),
}
}
if !at_least_one_char {
return Err(PatError::ManyInvalid);
}
if lower_bound > 0 {
if lower_bound >= 256 {
result.push(Atom::Rangext((lower_bound >> 8) as u8));
}
result.push(Atom::Skip((lower_bound & 0xff) as u8));
}
if chr == b']' {
continue;
}
let mut upper_bound = 0u32;
loop {
chr = iter.next().cloned().ok_or(PatError::ManyInvalid)?;
match chr {
b']' => break,
chr @ b'0'...b'9' => {
upper_bound = upper_bound * 10 + (chr - b'0') as u32;
if upper_bound >= 16384 {
return Err(PatError::ManyOverflow);
}
},
_ => return Err(PatError::ManyInvalid),
}
}
if lower_bound < upper_bound {
let many_skip = upper_bound - lower_bound;
if many_skip >= 256 {
result.push(Atom::Rangext((many_skip >> 8) as u8));
}
result.push(Atom::Many((many_skip & 0xff) as u8));
}
else {
return Err(PatError::ManyRange);
}
},
b'0'...b'9' | b'A'...b'F' | b'a'...b'f' => {
let hi = if chr >= b'a' { chr - b'a' + 10 }
else if chr >= b'A' { chr - b'A' + 10 }
else { chr - b'0' };
chr = iter.next().cloned().ok_or(PatError::UnpairedHexDigit)?;
let lo = if chr >= b'a' && chr <= b'f' { chr - b'a' + 10 }
else if chr >= b'A' && chr <= b'F' { chr - b'A' + 10 }
else if chr >= b'0' && chr <= b'9' { chr - b'0' }
else { return Err(PatError::UnpairedHexDigit); };
result.push(Atom::Byte((hi << 4) + lo));
},
b'"' => {
loop {
if let Some(chr) = iter.next().cloned() {
if chr != b'"' {
result.push(Atom::Byte(chr));
}
else {
break;
}
}
else {
return Err(PatError::UnclosedQuote);
}
}
},
b'\'' => {
if save >= u8::max_value() {
return Err(PatError::SaveOverflow);
}
result.push(Atom::Save(save));
save += 1;
},
b'?' => {
if let Some(Atom::Skip(skip)) = result.last_mut() {
if *skip != PTR_SKIP && *skip < 255u8 {
*skip += 1;
continue;
}
}
result.push(Atom::Skip(1));
},
b'@' => {
let op = iter.next().cloned().ok_or(PatError::AlignedOperand)?;
let atom = if op >= b'0' && op <= b'9' {
Atom::Aligned(op - b'0')
}
else if op >= b'A' && op <= b'Z' {
Atom::Aligned(10 + (op - b'A'))
}
else if op >= b'a' && op <= b'z' {
Atom::Aligned(10 + (op - b'a'))
}
else {
return Err(PatError::AlignedOperand);
};
result.push(atom);
},
b'i' => {
let atom = match iter.next().cloned() {
Some(b'1') => Atom::ReadI8(save),
Some(b'2') => Atom::ReadI16(save),
Some(b'4') => Atom::ReadI32(save),
_ => return Err(PatError::ReadOperand),
};
if save >= u8::max_value() {
return Err(PatError::SaveOverflow);
}
save += 1;
result.push(atom);
},
b'u' => {
let atom = match iter.next().cloned() {
Some(b'1') => Atom::ReadU8(save),
Some(b'2') => Atom::ReadU16(save),
Some(b'4') => Atom::ReadU32(save),
_ => return Err(PatError::ReadOperand),
};
if save >= u8::max_value() {
return Err(PatError::SaveOverflow);
}
save += 1;
result.push(atom);
},
b'z' => {
if save >= u8::max_value() {
return Err(PatError::SaveOverflow);
}
result.push(Atom::Zero(save));
save += 1;
},
b' ' | b'\n' | b'\r' | b'\t' => {},
_ => {
return Err(PatError::UnknownChar);
},
}
*pat = unsafe { str::from_utf8_unchecked(iter.as_slice()) };
}
if depth != 0 {
return Err(PatError::StackError);
}
if subs.len() != 0 {
return Err(PatError::SubPattern);
}
fn is_redundant(atom: &Atom) -> bool {
return match atom {
| Atom::Skip(_)
| Atom::Rangext(_)
| Atom::Pop
| Atom::Many(_) => true,
_ => false,
}
}
while result.last().map(is_redundant).unwrap_or(false) {
result.pop();
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
const _: [(); 2] = [(); std::mem::size_of::<Atom>()];
#[test]
fn patterns() {
use self::Atom::*;
assert_eq!(parse("12 34 56 ? ?"), Ok(vec![Save(0), Byte(0x12), Byte(0x34), Byte(0x56)]));
assert_eq!(parse("B9'?? 68???? E8${'} 8B"), Ok(vec![
Save(0), Byte(0xB9), Save(1), Skip(2), Byte(0x68), Skip(4), Byte(0xE8), Push(4), Jump4, Save(2), Pop, Byte(0x8B)
]));
assert_eq!(parse("${%{${%{}}}}"), Ok(vec![
Save(0), Push(4), Jump4, Push(1), Jump1, Push(4), Jump4, Push(1), Jump1
]));
assert_eq!(parse("24 5A9e D0 AFBea3 fCdd"), Ok(vec![
Save(0), Byte(0x24), Byte(0x5A), Byte(0x9E), Byte(0xD0), Byte(0xAF), Byte(0xBE), Byte(0xA3), Byte(0xFC), Byte(0xDD)
]));
assert_eq!(parse("\"string\""), Ok(vec![
Save(0), Byte(115), Byte(116), Byte(114), Byte(105), Byte(110), Byte(103)
]));
assert_eq!(parse("*{FF D8 42}"), Ok(vec![
Save(0), Push(PTR_SKIP), Ptr, Byte(0xFF), Byte(0xD8), Byte(0x42)
]));
assert_eq!(parse("*{\"hello\"00}"), Ok(vec![
Save(0), Push(PTR_SKIP), Ptr, Byte(104), Byte(101), Byte(108), Byte(108), Byte(111), Byte(0)
]));
assert_eq!(parse("b8 [16] 50 [13-42] ff"), Ok(vec![
Save(0), Byte(0xb8), Skip(16), Byte(0x50), Skip(13), Many(29), Byte(0xff)
]));
assert_eq!(parse("e9 $ @4"), Ok(vec![
Save(0), Byte(0xe9), Jump4, Aligned(4)
]));
assert_eq!(parse("83 c0 2a ( 6a ? | 68 ? ? ? ? ) e8"), Ok(vec![
Save(0), Byte(0x83), Byte(0xc0), Byte(0x2a),
Case(3), Byte(0x6a), Skip(1), Break(3),
Nop, Byte(0x68), Skip(4), Byte(0xe8),
]));
}
#[test]
fn errors() {
use self::PatError::*;
assert_eq!(Err(ParsePatError { kind: StackError, position: 2 }), parse("${"));
assert_eq!(Err(ParsePatError { kind: StackError, position: 0 }), parse("}}"));
assert_eq!(Err(ParsePatError { kind: StackInvalid, position: 3 }), parse("AB {}"));
assert_eq!(Err(ParsePatError { kind: UnpairedHexDigit, position: 2 }), parse("123"));
assert_eq!(Err(ParsePatError { kind: UnpairedHexDigit, position: 3 }), parse("EE BZ"));
assert_eq!(Err(ParsePatError { kind: UnpairedHexDigit, position: 0 }), parse("A?"));
assert_eq!(Err(ParsePatError { kind: UnknownChar, position: 0 }), parse("é"));
assert_eq!(Err(ParsePatError { kind: AlignedOperand, position: 0 }), parse("@"));
assert_eq!(Err(ParsePatError { kind: UnclosedQuote, position: 0 }), parse("\"unbalanced"));
assert_eq!(Err(ParsePatError { kind: ManyInvalid, position: 0 }), parse("[-2]"));
assert_eq!(Err(ParsePatError { kind: ManyInvalid, position: 0 }), parse("[-]"));
assert_eq!(Err(ParsePatError { kind: ManyInvalid, position: 0 }), parse("[]"));
assert_eq!(Err(ParsePatError { kind: ManyRange, position: 0 }), parse("[0-]"));
assert_eq!(Err(ParsePatError { kind: ManyRange, position: 0 }), parse("[0-0]"));
assert_eq!(Err(ParsePatError { kind: ManyRange, position: 0 }), parse("[20-1]"));
assert_eq!(Err(ParsePatError { kind: ManyOverflow, position: 0 }), parse("[20000-40000]"));
}
}