extern crate alloc;
extern crate nom;
extern crate parse_hyperlinks;
use std::{iter::zip, mem};
use expression::{Expression, ExpressionWithID};
pub use grammar::Grammar;
use node::NodeWithID;
pub use node::{Node, RegexExtKind, SymbolKind};
use string_interner::{backend::StringBackend, symbol::SymbolU32, StringInterner};
pub mod config;
mod expression;
pub mod grammar;
pub mod node;
mod parser;
pub mod regex;
pub mod semantic_error;
pub mod simplified_grammar;
pub mod suffix_automaton;
pub mod utils;
pub mod validated_grammar;
#[derive(Debug, Clone)]
pub struct InternedStrings {
pub nonterminals: StringInterner<StringBackend<SymbolU32>>,
pub terminals: StringInterner<StringBackend<SymbolU32>>,
pub regex_strings: StringInterner<StringBackend<SymbolU32>>,
pub sub_strings: StringInterner<StringBackend<SymbolU32>>,
}
pub fn get_grammar(input: &str) -> Result<Grammar, nom::Err<nom::error::VerboseError<&str>>> {
let (interned_strings, expressions) = intern_strings(parser::parse_expressions(input)?.1);
Ok(Grammar {
interned_strings,
expressions,
})
}
fn intern_strings(expressions: Vec<Expression>) -> (InternedStrings, Vec<ExpressionWithID>) {
let mut nonterminals = StringInterner::<StringBackend<SymbolU32>>::new();
let mut terminals = StringInterner::<StringBackend<SymbolU32>>::new();
let mut regex_strings = StringInterner::<StringBackend<SymbolU32>>::new();
let mut sub_strings = StringInterner::<StringBackend<SymbolU32>>::new();
let mut new_expressions = vec![];
for expression in expressions {
let lhs = nonterminals.get_or_intern(expression.lhs);
let mut rhs = NodeWithID::Unknown;
let node = expression.rhs;
let mut stack = vec![(node, &mut rhs)];
while let Some((mut node, parent)) = stack.pop() {
match &mut node {
Node::Terminal(terminal) => {
let terminal = std::mem::take(terminal);
*parent = NodeWithID::Terminal(terminals.get_or_intern(&terminal));
}
Node::RegexString(regex_string) => {
let regex_string = std::mem::take(regex_string);
*parent = NodeWithID::RegexString(regex_strings.get_or_intern(®ex_string));
}
Node::EarlyEndRegexString(regex_string) => {
let regex_string = std::mem::take(regex_string);
*parent =
NodeWithID::EarlyEndRegexString(regex_strings.get_or_intern(®ex_string));
}
Node::RegexComplement(regex_complement) => {
let regex_complement = std::mem::take(regex_complement);
*parent = NodeWithID::RegexComplement(
regex_strings.get_or_intern(®ex_complement),
);
}
Node::Substrings(substrings) => {
let substrings = std::mem::take(substrings);
*parent = NodeWithID::Substrings(sub_strings.get_or_intern(&substrings));
}
Node::Nonterminal(nonterminal) => {
let nonterminal = std::mem::take(nonterminal);
*parent = NodeWithID::Nonterminal(nonterminals.get_or_intern(&nonterminal));
}
Node::Multiple(nodes) => {
let nodes = std::mem::take(nodes);
let mut buffer = Vec::with_capacity(nodes.len());
buffer.resize(nodes.len(), NodeWithID::Unknown);
*parent = NodeWithID::Multiple(buffer);
match parent {
NodeWithID::Multiple(new_nodes) => {
for (node, new_parent) in zip(nodes, new_nodes.iter_mut()) {
stack.push((node, new_parent));
}
}
_ => unreachable!(),
}
}
Node::RegexExt(node, e) => {
let node = mem::replace(node, Box::new(Node::Terminal(String::new())));
*parent = NodeWithID::RegexExt(Box::new(NodeWithID::Unknown), *e);
match parent {
NodeWithID::RegexExt(new_node, _) => {
stack.push((*node, new_node));
}
_ => unreachable!(),
}
}
Node::Symbol(lhs, symbol, rhs) => {
let lhs = mem::replace(lhs, Box::new(Node::Terminal(String::new())));
let rhs = mem::replace(rhs, Box::new(Node::Terminal(String::new())));
*parent = NodeWithID::Symbol(
Box::new(NodeWithID::Unknown),
*symbol,
Box::new(NodeWithID::Unknown),
);
match parent {
NodeWithID::Symbol(l, _, r) => {
stack.push((*lhs, l));
stack.push((*rhs, r));
}
_ => unreachable!(),
}
}
Node::Group(node) => {
let node = mem::replace(node, Box::new(Node::Terminal(String::new())));
*parent = NodeWithID::Group(Box::new(NodeWithID::Unknown));
match parent {
NodeWithID::Group(new_node) => {
stack.push((*node, new_node));
}
_ => unreachable!(),
}
}
}
}
new_expressions.push((lhs, rhs));
}
(
InternedStrings {
nonterminals,
terminals,
regex_strings,
sub_strings,
},
new_expressions
.into_iter()
.map(|(lhs, rhs)| ExpressionWithID { lhs, rhs })
.collect(),
)
}