1mod grammar;
2mod input_range;
3mod traversal;
4
5use crate::{tracing, ParseTree, ParseTreeNode, Term};
6use grammar::{ParseGrammar, Production};
7use input_range::InputRange;
8use std::collections::{BTreeSet, HashMap, HashSet, VecDeque};
9use traversal::{TermMatch, Traversal, TraversalId, TraversalTree};
10
11pub fn parse<'gram>(
12 grammar: &'gram crate::Grammar,
13 input: &'gram str,
14) -> impl Iterator<Item = ParseTree<'gram>> {
15 ParseTreeIter::new(grammar, input)
16}
17
18#[derive(Debug, Default)]
20struct TraversalQueue {
21 processed: HashSet<TraversalId>,
22 queue: VecDeque<TraversalId>,
23}
24
25impl TraversalQueue {
26 pub fn pop_front(&mut self) -> Option<TraversalId> {
27 self.queue.pop_front()
28 }
29 pub fn push_back(&mut self, id: TraversalId) {
30 if self.processed.insert(id) {
31 self.queue.push_back(id);
32 }
33 }
34 pub fn push_back_starting<'gram>(
36 &mut self,
37 traversal_tree: &mut TraversalTree<'gram>,
38 grammar: &ParseGrammar<'gram>,
39 starting_term: &'gram Term,
40 input: &InputRange<'gram>,
41 ) {
42 for starting_prod in grammar.get_productions_by_lhs(starting_term) {
43 let traversal = traversal_tree.predict_starting(starting_prod, input);
44 self.push_back(traversal.id);
45 }
46 }
47}
48
49fn parse_tree<'gram>(
51 traversal_tree: &TraversalTree<'gram>,
52 grammar: &ParseGrammar<'gram>,
53 traversal_id: TraversalId,
54) -> ParseTree<'gram> {
55 let production = {
56 let traversal = traversal_tree.get(traversal_id);
57 grammar.get_production_by_id(traversal.production_id)
58 };
59 let rhs = traversal_tree
60 .get_matched(traversal_id)
61 .map(|term_match| match term_match {
62 TermMatch::Terminal(term) => ParseTreeNode::Terminal(term),
63 TermMatch::Nonterminal(traversal_id) => {
64 ParseTreeNode::Nonterminal(parse_tree(traversal_tree, grammar, *traversal_id))
65 }
66 })
67 .collect::<Vec<ParseTreeNode>>();
68
69 ParseTree::new(production.lhs, rhs)
70}
71
72fn earley<'gram>(
75 queue: &mut TraversalQueue,
76 traversal_tree: &mut TraversalTree<'gram>,
77 completions: &mut CompletionMap<'gram>,
78 grammar: &ParseGrammar<'gram>,
79) -> Option<TraversalId> {
80 let _span = tracing::span!(tracing::Level::DEBUG, "earley").entered();
81 while let Some(traversal_id) = queue.pop_front() {
82 tracing::event!(
83 tracing::Level::TRACE,
84 "earley queue pop: {:#?}",
85 traversal_tree.get(traversal_id)
86 );
87
88 match traversal_tree.get_matching(traversal_id) {
89 Some(nonterminal @ Term::Nonterminal(_)) => {
90 let _span = tracing::span!(tracing::Level::DEBUG, "Predict").entered();
91
92 let traversal = traversal_tree.get(traversal_id);
93 let lhs = grammar.get_production_by_id(traversal.production_id).lhs;
94
95 completions.insert(traversal, lhs);
96
97 let input_range = traversal.input_range.clone();
98
99 for production in grammar.get_productions_by_lhs(nonterminal) {
100 let predicted = traversal_tree.predict(production, &input_range);
101 tracing::event!(tracing::Level::TRACE, "predicted: {predicted:#?}");
102 queue.push_back(predicted.id);
103 }
104
105 for completed in completions.get_complete(nonterminal, &input_range) {
106 let term_match = TermMatch::Nonterminal(completed);
107 let prior_completed = traversal_tree.match_term(traversal_id, term_match);
108 tracing::event!(
109 tracing::Level::TRACE,
110 "prior_completed: {prior_completed:#?}"
111 );
112 queue.push_back(prior_completed.id);
113 }
114 }
115 Some(Term::Terminal(term)) => {
116 let _span = tracing::span!(tracing::Level::DEBUG, "Scan").entered();
117 let traversal = traversal_tree.get(traversal_id);
118 if traversal.input_range.next().starts_with(term) {
119 let term_match = TermMatch::Terminal(term);
120 let scanned = traversal_tree.match_term(traversal_id, term_match);
121 tracing::event!(tracing::Level::TRACE, "scanned: {scanned:#?}");
122 queue.push_back(scanned.id);
123 }
124 }
125 None => {
126 let _span = tracing::span!(tracing::Level::DEBUG, "Complete").entered();
127
128 let traversal = traversal_tree.get(traversal_id);
129 let is_full_traversal =
130 traversal.is_starting && traversal.input_range.is_complete();
131 let lhs = grammar.get_production_by_id(traversal.production_id).lhs;
132
133 completions.insert(traversal, lhs);
134
135 for incomplete_traversal_id in completions.get_incomplete(lhs, traversal) {
136 let term_match = TermMatch::Nonterminal(traversal_id);
137 let completed = traversal_tree.match_term(incomplete_traversal_id, term_match);
138
139 tracing::event!(tracing::Level::TRACE, "completed: {completed:#?}");
140 queue.push_back(completed.id);
141 }
142
143 if is_full_traversal {
144 return Some(traversal_id);
145 }
146 }
147 }
148 }
149
150 None
151}
152
153#[derive(Debug)]
154struct ParseTreeIter<'gram> {
155 traversal_tree: TraversalTree<'gram>,
156 grammar: ParseGrammar<'gram>,
157 queue: TraversalQueue,
158 completions: CompletionMap<'gram>,
159}
160
161impl<'gram> ParseTreeIter<'gram> {
162 pub fn new(grammar: &'gram crate::Grammar, input: &'gram str) -> Self {
163 let starting_term = grammar
164 .starting_term()
165 .expect("Grammar must have one production to parse");
166
167 let grammar = ParseGrammar::new(grammar);
168
169 let input = InputRange::new(input);
170 let mut traversal_tree = TraversalTree::default();
171 let mut queue = TraversalQueue::default();
172 let completions = CompletionMap::default();
173
174 queue.push_back_starting(&mut traversal_tree, &grammar, starting_term, &input);
175
176 Self {
177 traversal_tree,
178 grammar,
179 queue,
180 completions,
181 }
182 }
183}
184
185impl<'gram> Iterator for ParseTreeIter<'gram> {
186 type Item = ParseTree<'gram>;
187 fn next(&mut self) -> Option<Self::Item> {
188 let Self {
189 queue,
190 completions,
191 grammar,
192 traversal_tree,
193 } = self;
194
195 earley(queue, traversal_tree, completions, grammar).map(|traversal_id| {
196 let _span = tracing::span!(tracing::Level::DEBUG, "next_parse_tree").entered();
197 let parse_tree = parse_tree(traversal_tree, grammar, traversal_id);
198 tracing::event!(tracing::Level::TRACE, "\n{parse_tree}");
199 parse_tree
200 })
201 }
202}
203#[derive(Debug, PartialEq, Eq, Hash)]
205pub(crate) struct CompletionKey<'gram> {
206 term: &'gram Term,
207 input_start: usize,
208}
209
210impl<'gram> CompletionKey<'gram> {
211 pub fn new_start(term: &'gram Term, input: &InputRange<'gram>) -> Self {
212 Self {
213 term,
214 input_start: input.offset.start,
215 }
216 }
217 pub fn new_total(term: &'gram Term, input: &InputRange<'gram>) -> Self {
218 Self {
219 term,
220 input_start: input.offset.total_len(),
221 }
222 }
223}
224
225#[derive(Debug, Default)]
226pub(crate) struct CompletionMap<'gram> {
227 incomplete: HashMap<CompletionKey<'gram>, BTreeSet<TraversalId>>,
228 complete: HashMap<CompletionKey<'gram>, BTreeSet<TraversalId>>,
229}
230
231impl<'gram> CompletionMap<'gram> {
232 pub fn get_incomplete(
233 &'_ self,
234 term: &'gram Term,
235 complete_traversal: &Traversal<'gram>,
236 ) -> impl Iterator<Item = TraversalId> + '_ {
237 let _span = tracing::span!(tracing::Level::DEBUG, "get_incomplete").entered();
238 let key = CompletionKey::new_start(term, &complete_traversal.input_range);
239 self.incomplete.get(&key).into_iter().flatten().cloned()
240 }
241 pub fn get_complete(
242 &'_ self,
243 term: &'gram Term,
244 input_range: &InputRange<'gram>,
245 ) -> impl Iterator<Item = TraversalId> + '_ {
246 let _span = tracing::span!(tracing::Level::DEBUG, "get_complete").entered();
247 let key = CompletionKey::new_total(term, input_range);
248 self.complete.get(&key).into_iter().flatten().cloned()
249 }
250 pub fn insert(&mut self, traversal: &Traversal<'gram>, lhs: &'gram Term) {
251 let _span = tracing::span!(tracing::Level::DEBUG, "insert").entered();
252 match traversal.next_unmatched() {
253 Some(Term::Terminal(_)) => {
254 }
256 Some(unmatched @ Term::Nonterminal(_)) => {
257 let key = CompletionKey::new_total(unmatched, &traversal.input_range);
258 self.incomplete.entry(key).or_default().insert(traversal.id);
259 }
260 None => {
261 let key = CompletionKey::new_start(lhs, &traversal.input_range);
262 self.complete.entry(key).or_default().insert(traversal.id);
263 }
264 }
265 }
266}
267
268#[cfg(test)]
269mod tests {
270 use super::*;
271 use crate::Grammar;
272 use quickcheck::{Arbitrary, Gen, QuickCheck, TestResult};
273
274 #[derive(Debug, Clone)]
275 struct NestedEmptyGrammar(Grammar);
276 impl Arbitrary for NestedEmptyGrammar {
277 fn arbitrary(g: &mut Gen) -> Self {
278 let mut grammar: Grammar = "
279 <start> ::= <a> <empty>
280 <a> ::= 'a' <empty>"
281 .parse()
282 .unwrap();
283
284 let mut expressions: Vec<_> = grammar
285 .productions_iter_mut()
286 .flat_map(|prod| prod.rhs_iter_mut())
287 .collect();
288
289 let expr_indexes: Vec<usize> = (0..expressions.len()).collect();
290 let expr_choice_index = g.choose(&expr_indexes).unwrap();
291 let expr_choice: &mut crate::Expression = expressions[*expr_choice_index];
292
293 let term_choice_indexes: Vec<usize> = (0..expr_choice.terms.len()).collect();
294 let term_choice_index = g.choose(&term_choice_indexes).unwrap();
295
296 expr_choice
297 .terms
298 .insert(*term_choice_index, Term::Nonterminal(String::from("empty")));
299
300 grammar.add_production("<empty> ::= ''".parse().unwrap());
301
302 Self(grammar)
303 }
304 }
305
306 fn prop_empty_rules_allow_parse(grammar: NestedEmptyGrammar) -> TestResult {
307 let input = "a";
308
309 let mut parses = parse(&grammar.0, input);
310 TestResult::from_bool(parses.next().is_some())
311 }
312
313 #[test]
314 fn empty_rules_allow_parse() {
315 QuickCheck::new()
316 .tests(1000)
317 .quickcheck(prop_empty_rules_allow_parse as fn(NestedEmptyGrammar) -> TestResult)
318 }
319}