1extern crate alloc;
39extern crate nom;
40extern crate parse_hyperlinks;
41use std::{iter::zip, mem};
42
43use expression::{Expression, ExpressionWithID};
44pub use grammar::Grammar;
45use node::NodeWithID;
46pub use node::{Node, RegexExtKind, SymbolKind};
47use string_interner::{backend::StringBackend, symbol::SymbolU32, StringInterner};
48
49pub mod config;
50mod expression;
51pub mod grammar;
52pub mod node;
53mod parser;
54pub mod regex;
55pub mod semantic_error;
56pub mod simplified_grammar;
57pub mod suffix_automaton;
58pub mod utils;
59pub mod validated_grammar;
60
61#[derive(Debug, Clone)]
62pub struct InternedStrings {
63 pub nonterminals: StringInterner<StringBackend<SymbolU32>>,
64 pub terminals: StringInterner<StringBackend<SymbolU32>>,
65 pub regex_strings: StringInterner<StringBackend<SymbolU32>>,
66 pub sub_strings: StringInterner<StringBackend<SymbolU32>>,
67}
68
69pub fn get_grammar(input: &str) -> Result<Grammar, nom::Err<nom::error::VerboseError<&str>>> {
84 let (interned_strings, expressions) = intern_strings(parser::parse_expressions(input)?.1);
85 Ok(Grammar {
86 interned_strings,
87 expressions,
88 })
89}
90
91fn intern_strings(expressions: Vec<Expression>) -> (InternedStrings, Vec<ExpressionWithID>) {
92 let mut nonterminals = StringInterner::<StringBackend<SymbolU32>>::new();
93 let mut terminals = StringInterner::<StringBackend<SymbolU32>>::new();
94 let mut regex_strings = StringInterner::<StringBackend<SymbolU32>>::new();
95 let mut sub_strings = StringInterner::<StringBackend<SymbolU32>>::new();
96 let mut new_expressions = vec![];
97 for expression in expressions {
98 let lhs = nonterminals.get_or_intern(expression.lhs);
99 let mut rhs = NodeWithID::Unknown;
100 let node = expression.rhs;
101 let mut stack = vec![(node, &mut rhs)];
102 while let Some((mut node, parent)) = stack.pop() {
103 match &mut node {
104 Node::Terminal(terminal) => {
105 let terminal = std::mem::take(terminal);
106 *parent = NodeWithID::Terminal(terminals.get_or_intern(&terminal));
107 }
108 Node::RegexString(regex_string) => {
109 let regex_string = std::mem::take(regex_string);
110 *parent = NodeWithID::RegexString(regex_strings.get_or_intern(®ex_string));
111 }
112 Node::EarlyEndRegexString(regex_string) => {
113 let regex_string = std::mem::take(regex_string);
114 *parent =
115 NodeWithID::EarlyEndRegexString(regex_strings.get_or_intern(®ex_string));
116 }
117 Node::RegexComplement(regex_complement) => {
118 let regex_complement = std::mem::take(regex_complement);
119 *parent = NodeWithID::RegexComplement(
120 regex_strings.get_or_intern(®ex_complement),
121 );
122 }
123 Node::Substrings(substrings) => {
124 let substrings = std::mem::take(substrings);
125 *parent = NodeWithID::Substrings(sub_strings.get_or_intern(&substrings));
126 }
127 Node::Nonterminal(nonterminal) => {
128 let nonterminal = std::mem::take(nonterminal);
129 *parent = NodeWithID::Nonterminal(nonterminals.get_or_intern(&nonterminal));
130 }
131 Node::Multiple(nodes) => {
132 let nodes = std::mem::take(nodes);
133 let mut buffer = Vec::with_capacity(nodes.len());
134 buffer.resize(nodes.len(), NodeWithID::Unknown);
135 *parent = NodeWithID::Multiple(buffer);
136 match parent {
137 NodeWithID::Multiple(new_nodes) => {
138 for (node, new_parent) in zip(nodes, new_nodes.iter_mut()) {
139 stack.push((node, new_parent));
140 }
141 }
142 _ => unreachable!(),
143 }
144 }
145 Node::RegexExt(node, e) => {
146 let node = mem::replace(node, Box::new(Node::Terminal(String::new())));
147 *parent = NodeWithID::RegexExt(Box::new(NodeWithID::Unknown), *e);
148 match parent {
149 NodeWithID::RegexExt(new_node, _) => {
150 stack.push((*node, new_node));
151 }
152 _ => unreachable!(),
153 }
154 }
155 Node::Symbol(lhs, symbol, rhs) => {
156 let lhs = mem::replace(lhs, Box::new(Node::Terminal(String::new())));
157 let rhs = mem::replace(rhs, Box::new(Node::Terminal(String::new())));
159 *parent = NodeWithID::Symbol(
160 Box::new(NodeWithID::Unknown),
161 *symbol,
162 Box::new(NodeWithID::Unknown),
163 );
164 match parent {
165 NodeWithID::Symbol(l, _, r) => {
166 stack.push((*lhs, l));
167 stack.push((*rhs, r));
168 }
169 _ => unreachable!(),
170 }
171 }
172 Node::Group(node) => {
173 let node = mem::replace(node, Box::new(Node::Terminal(String::new())));
174 *parent = NodeWithID::Group(Box::new(NodeWithID::Unknown));
175 match parent {
176 NodeWithID::Group(new_node) => {
177 stack.push((*node, new_node));
178 }
179 _ => unreachable!(),
180 }
181 }
182 }
183 }
184 new_expressions.push((lhs, rhs));
185 }
186 (
187 InternedStrings {
188 nonterminals,
189 terminals,
190 regex_strings,
191 sub_strings,
192 },
193 new_expressions
194 .into_iter()
195 .map(|(lhs, rhs)| ExpressionWithID { lhs, rhs })
196 .collect(),
197 )
198}