#![deny(
missing_docs,
trivial_casts,
trivial_numeric_casts,
unsafe_code,
unused_import_braces,
unused_qualifications
)]
#![feature(test)]
#![no_std]
extern crate test;
use smallvec::SmallVec;
type SVec<A> = SmallVec<[A; 16]>;
#[derive(Clone, Copy, Debug, Hash, PartialEq)]
pub enum Data<'a> {
Atom(&'a str),
Command(&'a str),
}
impl<'a> Data<'a> {
pub fn content(&self) -> &'a str {
match *self {
Data::Atom(string) => string,
Data::Command(string) => string,
}
}
}
#[derive(Debug, PartialEq)]
pub enum ParseError {
DanglingLeftParenthesis,
PrematureRightParenthesis,
NothingToParse,
}
pub trait Evaluate<T> {
fn evaluate<'a>(&mut self, statement: &[Data<'a>]) -> T;
fn interpret_single(&mut self, statement: &str) -> Result<T, ParseError> {
let mut data = SVec::<_>::new();
parse(statement, &mut data)?;
Ok(self.evaluate(&data[..]))
}
fn interpret_multiple(&mut self, code: &str) -> Result<T, ParseError> {
let mut old_idx = 0;
let mut lparen_stack = 0;
let mut result = Err(ParseError::NothingToParse);
let mut idx = 0;
let mut seen_non_ws = false;
for ch in code.chars() {
if ch == '\n' && lparen_stack == 0 && seen_non_ws {
seen_non_ws = false;
result = Ok(self.interpret_single(&code[old_idx..idx])?);
old_idx = idx + 1;
} else if ch == '(' {
lparen_stack += 1;
} else if ch == ')' {
if lparen_stack == 0 {
return Err(ParseError::PrematureRightParenthesis);
}
lparen_stack -= 1;
} else if !ch.is_whitespace() {
seen_non_ws = true;
}
idx += ch.len_utf8();
}
if idx != old_idx && seen_non_ws {
result = Ok(self.interpret_single(&code[old_idx..idx])?);
}
result
}
}
#[derive(Debug, Default, PartialEq)]
pub struct PartialParse {
lparen_stack: usize,
has_encountered_rparen: bool,
}
#[derive(Debug, PartialEq)]
pub enum PartialParseOp {
Ready,
Unready,
Discard,
}
impl PartialParse {
pub fn parse_increment(&mut self, input: u8) -> PartialParseOp {
if input == b'\n' && self.lparen_stack == 0 {
self.has_encountered_rparen = false;
return PartialParseOp::Ready;
} else if input == b'(' {
self.lparen_stack += 1;
} else if input == b')' {
if self.lparen_stack == 0 {
self.has_encountered_rparen = true;
return PartialParseOp::Discard;
}
self.lparen_stack -= 1;
}
if self.has_encountered_rparen {
PartialParseOp::Discard
} else {
PartialParseOp::Unready
}
}
}
fn parse<'a>(line: &'a str, output: &mut SVec<Data<'a>>) -> Result<(), ParseError> {
let mut lparen_stack = 0;
let (mut start, mut stop) = (0, 0);
for ch in line.chars() {
if lparen_stack > 0 {
if ch == '(' {
lparen_stack += 1;
stop += ch.len_utf8();
} else if ch == ')' {
lparen_stack -= 1;
if lparen_stack == 0 {
output.push(Data::Command(&line[start..stop]));
stop += ch.len_utf8();
start = stop;
} else {
stop += ch.len_utf8();
}
} else {
stop += ch.len_utf8();
}
} else if ch.is_whitespace() {
if start != stop {
output.push(Data::Atom(&line[start..stop]));
}
stop += ch.len_utf8();
start = stop;
} else if ch == '(' {
lparen_stack += 1;
if start != stop {
output.push(Data::Atom(&line[start..stop]));
}
stop += ch.len_utf8();
start = stop;
} else if ch == ')' {
return Err(ParseError::PrematureRightParenthesis);
} else {
stop += ch.len_utf8();
}
}
if lparen_stack > 0 {
return Err(ParseError::DanglingLeftParenthesis);
}
if start != stop {
output.push(Data::Atom(&line[start..stop]));
}
if output.is_empty() {
Err(ParseError::NothingToParse)
} else {
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
use test::{black_box, Bencher};
#[test]
fn empty_parse() {
let line = "";
let mut data = SVec::<Data>::new();
assert_eq!(Err(ParseError::NothingToParse), parse(line, &mut data));
assert_eq!(true, data.is_empty());
}
#[test]
fn basic_parse() {
let line = "Set Log Level 0";
let mut data = SVec::<Data>::new();
parse(line, &mut data).unwrap();
assert_eq!(4, data.len());
assert_eq!(Data::Atom("Set"), data[0]);
assert_eq!(Data::Atom("Log"), data[1]);
assert_eq!(Data::Atom("Level"), data[2]);
assert_eq!(Data::Atom("0"), data[3]);
}
#[test]
fn parse_weird_whitespace() {
let line = "Set Log\n\n\n Level ( 0)";
let mut data = SVec::<Data>::new();
parse(line, &mut data).unwrap();
assert_eq!(4, data.len());
assert_eq!(Data::Atom("Set"), data[0]);
assert_eq!(Data::Atom("Log"), data[1]);
assert_eq!(Data::Atom("Level"), data[2]);
assert_eq!(Data::Command(" 0"), data[3]);
}
#[test]
fn parse_unicode() {
let line = "2";
let mut data = SVec::<Data>::new();
parse(line, &mut data).unwrap();
assert_eq!(1, data.len());
}
#[test]
fn empty_subcommand_parse() {
let line = "()";
let mut data = SVec::<Data>::new();
parse(line, &mut data).unwrap();
assert_eq!(1, data.len());
assert_eq!(Data::Command(""), data[0]);
}
#[test]
fn empty_nested_subcommand_parse() {
let line = "(())";
let mut data = SVec::<Data>::new();
parse(line, &mut data).unwrap();
assert_eq!(1, data.len());
assert_eq!(Data::Command("()"), data[0]);
}
#[test]
fn empty_nested_subcommand_with_more_empty_parse() {
let line = "(())()";
let mut data = SVec::<Data>::new();
parse(line, &mut data).unwrap();
assert_eq!(2, data.len());
assert_eq!(Data::Command("()"), data[0]);
assert_eq!(Data::Command(""), data[1]);
}
#[test]
fn subcommand_parse() {
let line = "Set Log Level (Get Log Level)";
let mut data = SVec::<Data>::new();
parse(line, &mut data).unwrap();
assert_eq!(4, data.len());
assert_eq!(Data::Atom("Set"), data[0]);
assert_eq!(Data::Atom("Log"), data[1]);
assert_eq!(Data::Atom("Level"), data[2]);
assert_eq!(Data::Command("Get Log Level"), data[3]);
}
#[test]
fn subcommand_parse_multiline() {
let line = "Set Log Level (\n\tGet Logger Levels\n)";
let mut data = SVec::<Data>::new();
parse(line, &mut data).unwrap();
assert_eq!(4, data.len());
assert_eq!(Data::Atom("Set"), data[0]);
assert_eq!(Data::Atom("Log"), data[1]);
assert_eq!(Data::Atom("Level"), data[2]);
assert_eq!(Data::Command("\n\tGet Logger Levels\n"), data[3]);
let mut new_data = SVec::<Data>::new();
parse(data[3].content(), &mut new_data).unwrap();
assert_eq!(3, new_data.len());
assert_eq!(Data::Atom("Get"), new_data[0]);
assert_eq!(Data::Atom("Logger"), new_data[1]);
assert_eq!(Data::Atom("Levels"), new_data[2]);
}
#[test]
fn fail_parse_too_long() {
let line = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse viverra porta lacus, quis pretium nibh lacinia at. Mauris convallis sed lectus nec dapibus. Interdum et malesuada fames ac ante ipsum primis in faucibus. Nulla vulputate sapien dui. Aliquam finibus ante ut purus facilisis, in sagittis tortor varius. Nunc interdum fermentum libero, et egestas arcu convallis sed. Maecenas nec diam a libero vulputate suscipit. Phasellus ac dolor ut nunc ultricies fringilla. Maecenas sed feugiat nunc. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia Curae. Quisque tincidunt metus ut ante dapibus, et molestie massa varius. Sed ultrices sapien sed mauris congue pretium. Pellentesque bibendum hendrerit sagittis. Vestibulum dignissim egestas feugiat. Ut porttitor et massa a posuere. Ut euismod metus a sem facilisis ullamcorper. Proin pharetra placerat enim";
let mut data = SVec::<_>::new();
parse(line, &mut data).unwrap();
}
#[test]
fn fail_parse_closing_parenthesis() {
let line = "command ) will not work";
let mut data = SVec::<_>::new();
assert_eq!(
ParseError::PrematureRightParenthesis,
parse(line, &mut data).unwrap_err()
);
}
#[test]
fn fail_parse_dangling_open_parenthesis() {
let line = "command ( will not work";
let mut data = SVec::<_>::new();
assert_eq!(
ParseError::DanglingLeftParenthesis,
parse(line, &mut data).unwrap_err()
);
}
#[test]
fn parse_touching_nested_call() {
let line = "call(call)";
let mut data = SVec::<_>::new();
parse(line, &mut data).unwrap();
assert_eq!(2, data.len());
assert_eq!(&[Data::Atom("call"), Data::Command("call")], &data[0..2]);
}
#[test]
fn interpret_empty() {
struct Eval {
pub invoked: usize,
}
impl Evaluate<()> for Eval {
fn evaluate<'a>(&mut self, _: &[Data<'a>]) {
self.invoked += 1;
}
}
let mut eval = Eval { invoked: 0 };
let line = "";
assert_eq!(Err(ParseError::NothingToParse), eval.interpret_single(line));
assert_eq!(0, eval.invoked);
eval.interpret_multiple("command").unwrap();
assert_eq!(1, eval.invoked);
}
#[test]
fn interpret_whitespace() {
struct Eval {
pub invoked: usize,
}
impl Evaluate<()> for Eval {
fn evaluate<'a>(&mut self, _: &[Data<'a>]) {
panic!["Should never be invoked"]
}
}
let mut eval = Eval { invoked: 0 };
let line = " ";
assert_eq!(Err(ParseError::NothingToParse), eval.interpret_single(line));
assert_eq!(
Err(ParseError::NothingToParse),
eval.interpret_multiple(line)
);
assert_eq!(
Err(ParseError::NothingToParse),
eval.interpret_multiple(" \n")
);
}
#[test]
fn interpret_unicode() {
struct Eval {
pub invoked: usize,
}
impl Evaluate<()> for Eval {
fn evaluate<'a>(&mut self, _: &[Data<'a>]) {
self.invoked += 1;
}
}
let mut eval = Eval { invoked: 0 };
let line = "2";
eval.interpret_single(line).unwrap();
eval.interpret_multiple(line).unwrap();
assert_eq!(2, eval.invoked);
}
#[test]
fn interpret_multiple_simple() {
struct Eval {
pub invoked: usize,
}
impl Evaluate<()> for Eval {
fn evaluate<'a>(&mut self, _: &[Data<'a>]) {
self.invoked += 1;
}
}
let mut eval = Eval { invoked: 0 };
eval.interpret_multiple("X\nY\nZ\nW (\n\t1 2 3\n) W-1\nQ")
.unwrap();
assert_eq!(5, eval.invoked);
}
#[test]
fn interpret_multiple() {
struct Eval {
pub invoked: usize,
}
impl Evaluate<()> for Eval {
fn evaluate<'a>(&mut self, commands: &[Data<'a>]) {
self.invoked += 1;
match self.invoked {
1 => {
assert_eq!(
&[
Data::Atom("Lorem"),
Data::Atom("ipsum"),
Data::Command("\n\tdolor sit amet\n\tX\n")
],
commands
);
}
2 => {
assert_eq!(
&[
Data::Atom("dolor"),
Data::Atom("sit"),
Data::Atom("amet"),
Data::Atom("X")
],
commands
);
}
3 => {
assert_eq!(&[Data::Atom("Singular")], commands);
}
_ => assert![false],
}
for command in commands {
match command {
Data::Atom(_) => {}
Data::Command(string) => {
self.interpret_single(string).unwrap();
}
}
}
}
}
let mut eval = Eval { invoked: 0 };
eval.interpret_multiple("Lorem ipsum (\n\tdolor sit amet\n\tX\n)\nSingular")
.unwrap();
assert_eq!(3, eval.invoked);
}
#[test]
fn evaluator() {
struct Eval {
pub invoked: usize,
}
impl Evaluate<()> for Eval {
fn evaluate<'a>(&mut self, _: &[Data<'a>]) {
self.invoked += 1;
}
}
let mut eval = Eval { invoked: 0 };
eval.interpret_single("Hello World").unwrap();
assert_eq!(1, eval.invoked);
eval.interpret_single("This is an example (command)")
.unwrap();
assert_eq!(2, eval.invoked);
}
#[test]
fn recursive_evaluator() {
struct Eval {
pub invoked: usize,
}
impl Evaluate<()> for Eval {
fn evaluate<'a>(&mut self, commands: &[Data<'a>]) {
self.invoked += 1;
for command in commands {
match command {
Data::Atom(_) => {}
Data::Command(string) => {
self.interpret_single(string).unwrap();
}
}
}
}
}
let mut eval = Eval { invoked: 0 };
eval.interpret_single("Hello World").unwrap();
assert_eq!(1, eval.invoked);
eval.interpret_single("This is an example of substitution: (command)")
.unwrap();
assert_eq!(3, eval.invoked);
eval.interpret_single(
"We can substitute more than once: (my command), anywhere: (another command here)",
)
.unwrap();
assert_eq!(6, eval.invoked);
eval.interpret_single("We can also nest substitutions: (my (recursive (command) here))")
.unwrap();
assert_eq!(10, eval.invoked);
eval.interpret_single("a (\n\tb c\n)").unwrap();
assert_eq!(12, eval.invoked);
}
#[test]
fn partial_parse_single_line() {
let mut part = PartialParse::default();
for ch in "hello world".bytes() {
assert_eq!(PartialParseOp::Unready, part.parse_increment(ch));
}
assert_eq!(PartialParseOp::Ready, part.parse_increment(b'\n'));
}
#[test]
fn partial_parse_multi_line() {
let mut part = PartialParse::default();
for ch in "hello world (".bytes() {
assert_eq!(PartialParseOp::Unready, part.parse_increment(ch));
}
assert_eq!(PartialParseOp::Unready, part.parse_increment(b'\n'));
for ch in "this is a message".bytes() {
assert_eq!(PartialParseOp::Unready, part.parse_increment(ch));
}
assert_eq!(PartialParseOp::Unready, part.parse_increment(b'\n'));
assert_eq!(PartialParseOp::Unready, part.parse_increment(b')'));
for ch in "last few words".bytes() {
assert_eq!(PartialParseOp::Unready, part.parse_increment(ch));
}
assert_eq!(PartialParseOp::Ready, part.parse_increment(b'\n'));
}
#[test]
fn partial_parse_multi_line_nested() {
let mut part = PartialParse::default();
for ch in "hello world (".bytes() {
assert_eq!(PartialParseOp::Unready, part.parse_increment(ch));
}
assert_eq!(PartialParseOp::Unready, part.parse_increment(b'\n'));
for ch in "this) (is (a message".bytes() {
assert_eq!(PartialParseOp::Unready, part.parse_increment(ch));
}
assert_eq!(PartialParseOp::Unready, part.parse_increment(b'\n'));
assert_eq!(PartialParseOp::Unready, part.parse_increment(b')'));
for ch in "last few words".bytes() {
assert_eq!(PartialParseOp::Unready, part.parse_increment(ch));
}
assert_eq!(PartialParseOp::Unready, part.parse_increment(b'\n'));
assert_eq!(PartialParseOp::Unready, part.parse_increment(b')'));
assert_eq!(PartialParseOp::Ready, part.parse_increment(b'\n'));
}
#[test]
fn partial_parse_error() {
let mut part = PartialParse::default();
for ch in "hello world".bytes() {
assert_eq!(PartialParseOp::Unready, part.parse_increment(ch));
}
assert_eq!(PartialParseOp::Discard, part.parse_increment(b')'));
assert_eq!(PartialParseOp::Ready, part.parse_increment(b'\n'));
}
#[test]
fn partial_parse_error_complex() {
let mut part = PartialParse::default();
for ch in "hello world (\na b c) d ".bytes() {
assert_eq!(PartialParseOp::Unready, part.parse_increment(ch));
}
assert_eq!(PartialParseOp::Discard, part.parse_increment(b')'));
for ch in "opener (\na b c d\ne f".bytes() {
assert_eq!(PartialParseOp::Discard, part.parse_increment(ch));
}
assert_eq!(PartialParseOp::Discard, part.parse_increment(b')'));
assert_eq!(PartialParseOp::Ready, part.parse_increment(b'\n'));
}
#[test]
fn premature_right_parentheses_discards_entire_line() {
let mut part = PartialParse::default();
for ch in "hello world (\na b c) d ".bytes() {
assert_eq!(PartialParseOp::Unready, part.parse_increment(ch));
}
assert_eq!(PartialParseOp::Discard, part.parse_increment(b')'));
for ch in "opener (\na b c d\ne f".bytes() {
assert_eq!(PartialParseOp::Discard, part.parse_increment(ch));
}
assert_eq!(PartialParseOp::Discard, part.parse_increment(b'\n'));
assert_eq!(PartialParseOp::Discard, part.parse_increment(b'a'));
assert_eq!(PartialParseOp::Discard, part.parse_increment(b'('));
assert_eq!(PartialParseOp::Discard, part.parse_increment(b'\n'));
assert_eq!(PartialParseOp::Discard, part.parse_increment(b'x'));
assert_eq!(PartialParseOp::Discard, part.parse_increment(b'\n'));
assert_eq!(PartialParseOp::Discard, part.parse_increment(b'd'));
assert_eq!(PartialParseOp::Discard, part.parse_increment(b')'));
assert_eq!(PartialParseOp::Discard, part.parse_increment(b')'));
assert_eq!(PartialParseOp::Ready, part.parse_increment(b'\n'));
assert_eq!(PartialParseOp::Unready, part.parse_increment(b'x'));
}
#[bench]
fn empty_evaluate(b: &mut Bencher) {
struct Eval {}
impl Evaluate<()> for Eval {
fn evaluate<'a>(&mut self, _: &[Data<'a>]) {}
}
let mut eval = Eval {};
b.iter(|| {
eval.interpret_single(black_box("unknown reasonably long command"))
.unwrap();
});
}
#[bench]
fn empty_evaluate_very_short(b: &mut Bencher) {
struct Eval {}
impl Evaluate<()> for Eval {
fn evaluate<'a>(&mut self, _: &[Data<'a>]) {}
}
let mut eval = Eval {};
b.iter(|| {
eval.interpret_single(black_box("x")).unwrap();
});
}
#[bench]
fn empty_evaluate_very_long(b: &mut Bencher) {
struct Eval {}
impl Evaluate<()> for Eval {
fn evaluate<'a>(&mut self, _: &[Data<'a>]) {}
}
let mut eval = Eval {};
b.iter(|| {
eval.interpret_single(black_box("Lorem ipsum dolor sit amet, consectetur adipiscing elit. Mauris tristique massa magna, eget consectetur dui posuere congue. Etiam rhoncus porttitor enim, eget malesuada ante dapibus eget. Duis neque dui, tincidunt ut varius")).unwrap();
});
}
#[bench]
fn empty_evaluate_with_subsistution(b: &mut Bencher) {
struct Eval {}
impl Evaluate<()> for Eval {
fn evaluate<'a>(&mut self, _: &[Data<'a>]) {}
}
let mut eval = Eval {};
b.iter(|| {
eval.interpret_single(black_box("unknown (some) (long command 1)"))
.unwrap();
});
}
#[bench]
fn increment_evaluate(b: &mut Bencher) {
struct Eval {
pub invoke: usize,
}
impl Evaluate<()> for Eval {
fn evaluate<'a>(&mut self, _: &[Data<'a>]) {
self.invoke += 1;
}
}
let mut eval = Eval { invoke: 0 };
b.iter(|| {
eval.interpret_single(black_box("unknown reasonably long command"))
.unwrap();
});
}
#[bench]
fn parse_very_long(b: &mut Bencher) {
let line = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Mauris tristique massa magna, eget consectetur dui posuere congue. Etiam rhoncus porttitor enim, eget malesuada ante dapibus eget. Duis neque dui, tincidunt ut varius";
b.iter(|| {
let mut data = SVec::<_>::new();
parse(black_box(line), &mut data).unwrap();
});
}
#[bench]
fn iterate_very_long(b: &mut Bencher) {
let line = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Mauris tristique massa magna, eget consectetur dui posuere congue. Etiam rhoncus porttitor enim, eget malesuada ante dapibus eget. Duis neque dui, tincidunt ut varius";
b.iter(|| {
let mut count = 0;
for _ in black_box(line).chars() {
count += 1;
}
assert_eq!(223, count);
});
}
}