pub mod automata;
use crate::lexer::{Lexeme, Lexer};
use core::any::TypeId as Rule;
use automata::*;
use tinyvec::ArrayVec;
use std::collections::VecDeque;
use crate::error::{ParceError, ParsePhaseFailure, ParceErrorInfo};
use std::fmt::Debug;
use crate::error::ParsePhaseFailure::NothingToParse;
pub trait Parse<O: Parseable>: ToString {
fn parse_max(&self) -> Result<(O, ParseCompletion), ParceError>;
fn parse_all(&self) -> Result<O, ParceError>;
}
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum ParseCompletion {
Complete,
Incomplete(usize)
}
pub trait Parseable: 'static + Sized {
type Lexer: Lexer;
const PRODUCTIONS: u32;
fn default_lexer() -> Box<Self::Lexer>;
fn commands(
rule: Rule,
route: u32,
state: u32,
lexeme: Lexeme<<Self::Lexer as Lexer>::Lexemes>
) -> ArrayVec<[AutomatonCommand; 3]>;
fn last_commands(rule: Rule, route: u32, state: u32) -> bool;
fn assemble(auto: Rawtomaton, lexemes: &[Lexeme<<Self::Lexer as Lexer>::Lexemes>], text: &str) -> Result<(usize, Self), ParceError>;
}
impl<I: ToString, O: Parseable> Parse<O> for I {
fn parse_max(&self) -> Result<(O, ParseCompletion), ParceError> {
let text = self.to_string();
let lexemes = O::default_lexer().lex(&text)?;
if lexemes.len() == 0 {
return Err(ParceError {
input: text,
start: 0,
info: ParceErrorInfo::Parse { failure: NothingToParse }
})
}
let army: Army = Army::new();
let mut alive: VecDeque<Rawtomaton> = VecDeque::new();
for i in 0..O::PRODUCTIONS {
alive.push_back(army.spawn(Rule::of::<O>(), i, 0).into());
}
let mut last = None;
let mut i = 0;
while !alive.is_empty() && i < lexemes.len() {
let mut j = 0;
while j < alive.len() {
let auto = alive[j];
unsafe {
let commands = O::commands((**auto).rule, (**auto).route, (**auto).state, lexemes[i]);
let result = army.command(auto, commands, i);
alive.extend(result.new_spawns);
j += result.reactivated.len();
for old in result.reactivated {
alive.push_front(old);
}
if let Some(vic) = result.victorious {
last = Some(vic);
}
if result.remove {
alive.remove(j);
} else if !result.fallthrough {
j += 1;
}
}
}
i += 1;
}
if i == lexemes.len() {
for auto in &alive {
unsafe {
if O::last_commands((***auto).rule, (***auto).route, (***auto).state) {
let result = army.command(*auto, tinyvec::array_vec!([AutomatonCommand; 3] => automata::AutomatonCommand::Victory), 0);
if let Some(vic) = result.victorious {
dbg!("hellO");
last = Some(vic);
}
}
}
}
}
if let Some(l) = last {
let (consumed, result) = O::assemble(l, lexemes.as_slice(), &text)?;
let completion = if consumed == lexemes.len() {
ParseCompletion::Complete
} else {
ParseCompletion::Incomplete(lexemes[consumed-1].start + lexemes[consumed-1].len)
};
Ok((result, completion))
} else {
Err(ParceError {
start: if alive.len() == 0 {
if i > 1 {
lexemes[i-1].start
} else {
0
}
} else if i > 0 {
text.len()
} else {
0
},
input: text,
info: ParceErrorInfo::parse(
if alive.len() == 0 {
ParsePhaseFailure::NoMatches
} else {
ParsePhaseFailure::InputEndedTooSoon
}
)
})
}
}
fn parse_all(&self) -> Result<O, ParceError> {
let (result, completion) = self.parse_max()?;
match completion {
ParseCompletion::Complete => Ok(result),
ParseCompletion::Incomplete(n) => Err(ParceError {
input: self.to_string(),
start: n+1,
info: ParceErrorInfo::parse(ParsePhaseFailure::LeftoverLexemes)
})
}
}
}
#[cfg(test)]
mod tests {
use crate as parce;
use parce::prelude::*;
macro_rules! parser_error {
($input:literal $start:literal $error:ident) => {
Err(parce::error::ParceError {
input: $input.to_string(),
start: $start,
info: parce::error::ParceErrorInfo::parse(parce::error::ParsePhaseFailure::$error)
})
};
}
macro_rules! pass {
($str:literal $result:expr) => {
assert_eq!($str.parse(), Ok($result))
}
}
macro_rules! fail {
($str:literal $grammar:ident $where:literal $error:ident) => {
assert_eq!($str.parse() as Result<$grammar,_>, parser_error!($str $where $error))
}
}
#[lexer(MyLexer)]
enum MyLexeme {
A = 'a',
B = 'b',
C = 'c',
D = 'd',
E = 'e',
F = 'f',
G = 'g',
Digit = "[0-9]",
Period = '.',
Bool = "'true' | 'false'",
#[skip] WhiteSpace = "[ \t\n\r]"
}
#[parser(MyLexer)]
enum BasicGrammar {
Thing = "A B C"
}
#[test]
fn basic() {
pass!("a b c" BasicGrammar::Thing);
fail!("a b" BasicGrammar 3 InputEndedTooSoon);
}
#[parser(MyLexer)]
enum OrGrammar {
Or = "A (B | B C) A"
}
#[test]
fn or() {
pass!("abca" OrGrammar::Or);
pass!("aba" OrGrammar::Or);
fail!("aa" OrGrammar 1 NoMatches);
fail!("a bbc a" OrGrammar 3 NoMatches);
}
#[parser(MyLexer)]
enum StarGrammar {
Star = "(A B C)*"
}
#[test]
fn star() {
pass!("abc abc" StarGrammar::Star);
assert_eq!("abc a".parse_max(), Ok((StarGrammar::Star, ParseCompletion::Incomplete(3))));
fail!("abc a" StarGrammar 4 LeftoverLexemes);
}
#[parser(MyLexer)]
enum OperatorGrammar {
Plus = "A (B C)+ A",
Question = "B (A C)? B",
FixedRange = "C (B A){3} C",
InfiniteRange = "D (A B){2,} D",
LimitedRange = "E (A B){2,4} E"
}
#[test]
fn question() {
pass!("b b" OperatorGrammar::Question);
pass!("b ac b" OperatorGrammar::Question);
fail!("b a b" OperatorGrammar 4 NoMatches);
}
#[test]
fn plus() {
pass!("a bc a" OperatorGrammar::Plus);
pass!("a bcbcbc a" OperatorGrammar::Plus);
fail!("a bcb a" OperatorGrammar 6 NoMatches);
fail!("aa" OperatorGrammar 1 NoMatches);
}
#[test]
fn range() {
pass!("c bababa c" OperatorGrammar::FixedRange);
fail!("c baba c" OperatorGrammar 7 NoMatches);
fail!("c babababa c" OperatorGrammar 8 NoMatches);
pass!("d abab d" OperatorGrammar::InfiniteRange);
pass!("d ababab d" OperatorGrammar::InfiniteRange);
fail!("d ab" OperatorGrammar 4 InputEndedTooSoon);
fail!("d ab d" OperatorGrammar 5 NoMatches);
pass!("e abab e" OperatorGrammar::LimitedRange);
pass!("e ababab e" OperatorGrammar::LimitedRange);
pass!("e abababab e" OperatorGrammar::LimitedRange);
fail!("e ab e" OperatorGrammar 5 NoMatches);
fail!("e ababababab e" OperatorGrammar 10 NoMatches);
}
#[parser(MyLexer)]
enum DelegateGrammar {
Start = "A #OrGrammar A"
}
#[test]
fn other_rule() {
pass!("a aba a" DelegateGrammar::Start);
pass!("a abca a" DelegateGrammar::Start);
}
#[parser(MyLexer)]
enum DotGrammar {
Dot = "A .* C"
}
#[test]
fn dot_and_greedy() {
pass!("a b c" DotGrammar::Dot);
pass!("a babcabcbacbac cc" DotGrammar::Dot)
}
#[parser(MyLexer)]
enum EndBehaviorGrammar {
Star = "A B*",
Question = "B A C?", Plus = "C A B+",
FixedRange = "D A{2}",
InfiniteRange = "E A{2,}",
LimitedRange = "F A{2,3}"
}
#[test]
fn end_behavior() {
fail!("" EndBehaviorGrammar 0 NothingToParse);
pass!("ab" EndBehaviorGrammar::Star);
pass!("ba" EndBehaviorGrammar::Question);
fail!("ca" EndBehaviorGrammar 2 InputEndedTooSoon);
fail!("d" EndBehaviorGrammar 1 InputEndedTooSoon);
fail!("da" EndBehaviorGrammar 2 InputEndedTooSoon);
pass!("daa" EndBehaviorGrammar::FixedRange);
fail!("daaa" EndBehaviorGrammar 4 LeftoverLexemes);
fail!("e" EndBehaviorGrammar 1 InputEndedTooSoon);
fail!("ea" EndBehaviorGrammar 2 InputEndedTooSoon);
pass!("eaa" EndBehaviorGrammar::InfiniteRange);
pass!("eaaa" EndBehaviorGrammar::InfiniteRange);
fail!("f" EndBehaviorGrammar 1 InputEndedTooSoon);
fail!("fa" EndBehaviorGrammar 2 InputEndedTooSoon);
pass!("faa" EndBehaviorGrammar::LimitedRange);
pass!("faaa" EndBehaviorGrammar::LimitedRange);
fail!("faaaa" EndBehaviorGrammar 5 LeftoverLexemes);
}
#[parser(MyLexer)]
enum NestingGrammar {
Or = "A (B | C (A A | B B)) D",
And = "B C (D E)",
Star = "C (A B C*)*"
}
#[test]
fn or_nesting() {
pass!("abd" NestingGrammar::Or);
pass!("acaad" NestingGrammar::Or);
pass!("acbbd" NestingGrammar::Or);
fail!("abaad" NestingGrammar 2 NoMatches);
fail!("abbbd" NestingGrammar 2 NoMatches);
fail!("acd" NestingGrammar 2 NoMatches);
}
#[test]
fn and_nesting() {
pass!("bcde" NestingGrammar::And);
fail!("bc" NestingGrammar 2 InputEndedTooSoon);
}
#[test]
fn star_nesting() {
pass!("c" NestingGrammar::Star);
pass!("c abc" NestingGrammar::Star);
pass!("c ab ab" NestingGrammar::Star);
pass!("c abc ab abcccc" NestingGrammar::Star);
}
#[parser(MyLexer)]
enum BareUnnamedGrammar {
Basic(BasicGrammar) = "A 0 A",
StarVec(Vec<BasicGrammar>) = "B (C 0)*",
QuestionOption(Option<BasicGrammar>) = "C 0?",
PlusVec(Vec<BasicGrammar>) = "D (A 0)+",
RangeVec(Vec<BasicGrammar>) = "E 0{2,4}",
NestedVecOption(Vec<Option<BasicGrammar>>) = "F (D 0?)+",
Multiple(Option<BasicGrammar>, Vec<OrGrammar>) = "G 0? A 1+",
Boxed(Option<Box<BareUnnamedGrammar>>) = "B A 0?"
}
#[test]
fn bare_unnamed_grammar() {
pass!("a abc a" BareUnnamedGrammar::Basic(BasicGrammar::Thing));
pass!("b cabc cabc cabc" BareUnnamedGrammar::StarVec(vec![BasicGrammar::Thing, BasicGrammar::Thing, BasicGrammar::Thing]));
pass!("b" BareUnnamedGrammar::StarVec(vec![]));
pass!("c abc" BareUnnamedGrammar::QuestionOption(Some(BasicGrammar::Thing)));
pass!("c" BareUnnamedGrammar::QuestionOption(None));
pass!("d aabc aabc aabc" BareUnnamedGrammar::PlusVec(vec![BasicGrammar::Thing, BasicGrammar::Thing, BasicGrammar::Thing]));
pass!("d aabc" BareUnnamedGrammar::PlusVec(vec![BasicGrammar::Thing]));
pass!("e abc abc" BareUnnamedGrammar::RangeVec(vec![BasicGrammar::Thing, BasicGrammar::Thing]));
pass!("f dabc dd dabc" BareUnnamedGrammar::NestedVecOption(vec![Some(BasicGrammar::Thing), None, None, Some(BasicGrammar::Thing)]));
pass!("g abc a abca" BareUnnamedGrammar::Multiple(Some(BasicGrammar::Thing), vec![OrGrammar::Or]));
pass!("ba daabc" BareUnnamedGrammar::Boxed(Some(Box::new(BareUnnamedGrammar::PlusVec(vec![BasicGrammar::Thing])))));
}
#[parser(MyLexer)]
enum BareNamedGrammar {
Basic {hello: BasicGrammar} = "A hello A",
StarVec {there: Vec<BasicGrammar>} = "B (C there)*",
QuestionOption{general: Option<BasicGrammar>} = "C general?",
PlusVec {kenobi: Vec<BasicGrammar>} = "D (A kenobi)+",
RangeVec {some_body: Vec<BasicGrammar>} = "E some_body{2,4}", NestedVecOption {once: Vec<Option<BasicGrammar>>} = "F (D once?)+",
Multiple {told: Option<BasicGrammar>, me: Vec<OrGrammar>} = "G told? A me+",
Boxed {the: Option<Box<BareNamedGrammar>>} = "B A the?"
}
#[test]
fn bare_named_grammar() {
pass!("a abc a" BareNamedGrammar::Basic {hello: BasicGrammar::Thing});
pass!("b cabc cabc cabc" BareNamedGrammar::StarVec {there: vec![BasicGrammar::Thing, BasicGrammar::Thing, BasicGrammar::Thing]});
pass!("b" BareNamedGrammar::StarVec {there: vec![]});
pass!("c abc" BareNamedGrammar::QuestionOption {general: Some(BasicGrammar::Thing)});
pass!("c" BareNamedGrammar::QuestionOption {general: None});
pass!("d aabc aabc aabc" BareNamedGrammar::PlusVec {kenobi: vec![BasicGrammar::Thing, BasicGrammar::Thing, BasicGrammar::Thing]});
pass!("d aabc" BareNamedGrammar::PlusVec {kenobi: vec![BasicGrammar::Thing]});
pass!("e abc abc" BareNamedGrammar::RangeVec {some_body: vec![BasicGrammar::Thing, BasicGrammar::Thing]});
pass!("f dabc dd dabc" BareNamedGrammar::NestedVecOption {once: vec![Some(BasicGrammar::Thing), None, None, Some(BasicGrammar::Thing)]});
pass!("g abc a abca" BareNamedGrammar::Multiple {told: Some(BasicGrammar::Thing), me: vec![OrGrammar::Or]});
pass!("ba daabc" BareNamedGrammar::Boxed {the: Some(Box::new(BareNamedGrammar::PlusVec {kenobi: vec![BasicGrammar::Thing]}))});
}
#[parser(MyLexer)]
enum AssignGrammar {
String(String) = "A 0=(B C D)",
Pass(String, BasicGrammar) = "B 0=1",
Number(f32, u64) = "C 0=(Digit+ Period Digit+) A 1=Digit+",
Bool {maybe: Vec<bool>} = "D (maybe=Bool)+"
}
#[test]
fn assigned_grammar() {
pass!("a b cd" AssignGrammar::String("b cd".to_string()));
pass!("b a b c" AssignGrammar::Pass("a b c".to_string(), BasicGrammar::Thing));
pass!("c 3.145 a 1105" AssignGrammar::Number(3.145, 1105));
pass!("d false" AssignGrammar::Bool {maybe: vec![false]});
pass!("d true false false" AssignGrammar::Bool {maybe: vec![true, false, false]});
}
}