libunseemly/
earley.rs

1#![allow(non_upper_case_globals)]
2
3// An Earley parser!
4// Partly based on advice from http://loup-vaillant.fr/tutorials/earley-parsing/.
5// Partly based on https://arxiv.org/abs/1102.2003
6//  (though I'll save you the trouble of reading that one:
7//    it can be summed up as "Just add the current grammar to the Earley rules,
8//                             and everything will work out fine.")
9// Why Earley?
10//  * Earley parses arbitrary CFLs, which are
11//    - a category of languages I can comprehend,
12//    - and closed under composition (though ambiguity can creep in)
13//  * Earley has a pretty good error message story (TODO: check that this is true)
14//  * Earley maxes out at O(n^3) time†, and for practical grammars tends to be linear
15//       †technically, the reflective bit makes parsing NP-complete, but No One Will Do That.
16//
17// Also, it turns out that implementing an Earley parser goes pretty smoothly. Yay!
18
19use crate::{
20    ast,
21    ast::{Ast, LocatedAst},
22    ast_walk::LazyWalkReses,
23    grammar::{
24        FormPat::{self, *},
25        SynEnv,
26    },
27    name::*,
28    util::{assoc::Assoc, mbe::EnvMBE},
29};
30use std::{cell::RefCell, collections::HashMap, rc::Rc};
31
32// TODO: This UniqueId stuff is great, but we could make things faster
33//  by storing array indices instead
34
35thread_local! {
36    static next_id: RefCell<u32> = RefCell::new(0);
37
38    // TODO: instead of indexing by unique cell, we should intern `ParseContext`s
39    //  for fast (and not just pointer-based) comparison.
40    static all_parse_contexts: RefCell<HashMap<UniqueIdRef, ParseContext>>
41        = RefCell::new(HashMap::new());
42
43    // For parse error reporting: how far have we gotten?
44    static best_token: RefCell<(usize, Rc<FormPat>, usize)>
45        = RefCell::new((0, Rc::new(Impossible), 0));
46
47    pub static files: RefCell<codespan_reporting::files::SimpleFiles<String, String>>
48        = RefCell::new(codespan_reporting::files::SimpleFiles::new())
49}
50
51fn get_next_id() -> UniqueId {
52    next_id.with(|id| {
53        let res = UniqueId(*id.borrow());
54        *id.borrow_mut() += 1;
55        res
56    })
57}
58
59// Specifically *not* `Clone` or `Copy`
60#[derive(PartialEq, Eq)]
61pub struct UniqueId(u32);
62
63impl std::fmt::Debug for UniqueId {
64    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, "{}", self.0) }
65}
66
67#[derive(PartialEq, Eq, Clone, Copy, Hash)]
68pub struct UniqueIdRef(u32);
69
70impl UniqueId {
71    fn get_ref(&self) -> UniqueIdRef { UniqueIdRef(self.0) }
72
73    fn is(&self, other: UniqueIdRef) -> bool { self.0 == other.0 }
74}
75
76impl std::fmt::Debug for UniqueIdRef {
77    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, "{}", self.0) }
78}
79
80// TODO: We should probably refactor to use `ParseContext`
81//  everywhere we currently use these two things together (particularly `earley.rs`).
82custom_derive! {
83    #[derive(Clone, Debug)]
84    pub struct ParseContext {
85        pub grammar: SynEnv,
86        pub type_ctxt: LazyWalkReses<crate::ty::SynthTy>,
87        pub eval_ctxt: LazyWalkReses<crate::runtime::eval::Eval>
88    }
89}
90
91impl crate::runtime::reify::Reifiable for ParseContext {
92    fn ty_name() -> Name { n("Language") }
93
94    fn reify(&self) -> crate::runtime::eval::Value {
95        crate::runtime::eval::Value::ParseContext(Box::new(self.clone()))
96    }
97
98    fn reflect(v: &crate::runtime::eval::Value) -> ParseContext {
99        extract!((v) crate::runtime::eval::Value::ParseContext = (ref lang)
100            => (**lang).clone())
101    }
102}
103
104impl PartialEq for ParseContext {
105    fn eq(&self, other: &ParseContext) -> bool {
106        self as *const ParseContext == other as *const ParseContext
107    }
108}
109
110impl ParseContext {
111    pub fn new_from_grammar(se: SynEnv) -> ParseContext {
112        ParseContext {
113            grammar: se,
114            // TODO: uh, I think this is unused?
115            type_ctxt: LazyWalkReses::<crate::ty::SynthTy>::new_empty(),
116            eval_ctxt: LazyWalkReses::<crate::runtime::eval::Eval>::new_empty(),
117        }
118    }
119    pub fn with_grammar(self, se: SynEnv) -> ParseContext { ParseContext { grammar: se, ..self } }
120}
121
122// Hey, this doesn't need to be Reifiable!
123pub struct Item {
124    /// Where (in the token input stream) this rule tried to start matching
125    start_idx: usize,
126    /// What are we trying to match?
127    rule: Rc<FormPat>,
128    /// The location of the • in the rule. Most rules are very short
129    pos: usize,
130    /// The current grammar, so we can interperate `Call` rules,
131    ///  and environments for typing/evaluating syntax extensions
132    pc: Rc<ParseContext>,
133
134    // -- Just for error messages --
135    /// This rule is too commonplace to be informative in a parse error
136    common: bool,
137    /// What file are we in?
138    file_id: usize,
139
140    // -- Everything after this line is nonstandard, and is just here as an optimization--
141    /// Identity for the purposes of `wanted_by` and `local_parse`
142    id: UniqueId,
143
144    /// Can this complete things that have asked for it?
145    /// This is mostly a convenience.
146    done: RefCell<bool>, // Can this complete things that have asked for it?
147
148    /// For simple (leaf) nodes, we just store the resulting parse right away.
149    /// This is a convenience so that extracting the parse tree
150    ///  doesn't require trawling through the token input
151    /// For non-leaf nodes, store the ID of the item that provides justification
152    local_parse: RefCell<LocalParse>,
153
154    /// Traditionally, when a rule is completed in Earley parsing,
155    ///  one looks to try to find all the rules that could have asked for it.
156    /// That's expensive! But keeping back-pointers instead has a timing problem:
157    ///  we might create a new item with an advanced • (possibly one token ahead)
158    ///   before we know everything that called for the original item.
159    /// Therefore, we make sure that all items that start in the same place
160    ///   with the same rule/grammar
161    ///  share the same set of origins via `RefCell`.
162    ///
163    /// This, uh, might be an excessively cocky design.
164    /// But searching item lists is *hard* when your Earley parser
165    ///  has so many different kinds of rules!
166    wanted_by: Rc<RefCell<Vec<UniqueIdRef>>>,
167}
168
169/// Information for parsing. It's not a parse tree, but it tells you the next step to get one.
170/// (Hence "local")
171#[derive(PartialEq, Debug, Clone)]
172enum LocalParse {
173    /// ⊤; no information yet
174    NothingYet,
175    JustifiedByItemPlanB(UniqueIdRef), // Looking for a better parse, though... (for `Biased`)
176    JustifiedByItem(UniqueIdRef),
177    /// This might be more than an atom, but *only* in the rare `Anyways` case.
178    ParsedAtom(Ast),
179    /// ⊥; contradiction (TMI!)
180    Ambiguous(Box<LocalParse>, Box<LocalParse>),
181}
182use self::LocalParse::*;
183
184impl PartialOrd for LocalParse {
185    /// Establish a lattice for `LocalParse`; some parses are better than others.
186    /// `Biased` allows one to find a "Plan B" parse that gets overwritten by "Plan A".
187    /// But there's also `NothingYet`, for ... (TODO: only leaves and just-started nodes?)
188    /// ... and `Ambiguous`, when we know that there are multiple justifications for a single node
189    fn partial_cmp(&self, other: &LocalParse) -> Option<std::cmp::Ordering> {
190        use std::cmp::Ordering::*;
191        if self == other {
192            return Some(Equal);
193        }
194        match (self, other) {
195            (&NothingYet, _) | (_, &Ambiguous(_, _)) => Some(Less),
196            (&Ambiguous(_, _), _) | (_, &NothingYet) => Some(Greater),
197            (&JustifiedByItemPlanB(_), &JustifiedByItem(_)) => Some(Less),
198            (&JustifiedByItem(_), &JustifiedByItemPlanB(_)) => Some(Greater),
199            (&JustifiedByItem(_), &JustifiedByItem(_)) => None,
200            // semantically, this ought to be `None`, but that would be a hard-to-debug logic error
201            _ => icp!("Attempted to compare {:#?} and {:#?}", self, other),
202        }
203    }
204}
205
206impl Clone for Item {
207    fn clone(&self) -> Item {
208        Item {
209            start_idx: self.start_idx,
210            rule: self.rule.clone(),
211            pos: self.pos,
212            pc: self.pc.clone(),
213            common: self.common,
214            file_id: self.file_id,
215            id: get_next_id(),
216            done: self.done.clone(),
217            local_parse: RefCell::new(LocalParse::NothingYet),
218            wanted_by: self.wanted_by.clone(),
219        }
220    }
221}
222
223/// Progress through the state sets
224// TODO: this ought to produce an Option<ParseError>, not a bool!
225fn create_chart(
226    rule: Rc<FormPat>,
227    pc: ParseContext,
228    toks: &str,
229    file_id: usize,
230) -> (UniqueId, Vec<Vec<Item>>) {
231    let toks = toks.trim(); // HACK: tokens don't consume trailing whitespace
232    let mut chart: Vec<Vec<Item>> = vec![];
233    chart.resize_with(toks.len() + 1, std::default::Default::default);
234
235    let start_but_startier = get_next_id();
236
237    let start_item = Item {
238        start_idx: 0,
239        rule: rule,
240        pos: 0,
241        pc: Rc::new(pc),
242        common: false,
243        file_id: file_id,
244        id: get_next_id(),
245        done: RefCell::new(false),
246        local_parse: RefCell::new(LocalParse::NothingYet),
247        wanted_by: Rc::new(RefCell::new(vec![start_but_startier.get_ref()])),
248    };
249
250    chart[0].push(start_item);
251
252    for cur_tok in 0..toks.len() {
253        walk_tt(&mut chart, &toks, cur_tok)
254    }
255
256    examine_state_set(&mut chart, &toks, toks.len()); // One last time, for nullable rules at the end
257
258    (start_but_startier, chart)
259}
260
261/// Recognize `rule` in `grammar` (but assume no code will need to be executed)
262/// For testing only; doesn't set the filename properly!
263fn recognize(rule: &FormPat, grammar: &SynEnv, toks: &str) -> bool {
264    let (start_but_startier, chart) = create_chart(
265        Rc::new(rule.clone()),
266        ParseContext::new_from_grammar(grammar.clone()),
267        toks,
268        0,
269    );
270
271    chart[chart.len() - 1].iter().any(|item| {
272        (*item.wanted_by.borrow()).iter().any(|idr| start_but_startier.is(*idr))
273            && *item.done.borrow()
274    })
275}
276
277fn walk_tt(chart: &mut Vec<Vec<Item>>, toks: &str, cur_tok: usize) {
278    examine_state_set(chart, toks, cur_tok);
279    // log!("\n  {:#?}\n->{:#?}\n", chart[*cur_tok], chart[*cur_tok + 1]);
280}
281
282/// Progresses a state set until it won't go any further.
283/// Returns the state set for the next token.
284fn examine_state_set(chart: &mut Vec<Vec<Item>>, toks: &str, cur_tok: usize) {
285    // If nullable items are statically identified, I think there's an optimization
286    //  where we don't re-walk old items
287    while new_items_from_state_set(chart, toks, cur_tok) {} // Until a fixpoint is reached
288}
289
290fn new_items_from_state_set(chart: &mut Vec<Vec<Item>>, toks: &str, cur_tok: usize) -> bool {
291    let mut effect = false;
292    for idx in 0..chart[cur_tok].len() {
293        for (new_item, adv) in chart[cur_tok][idx].examine(toks, cur_tok, chart) {
294            effect = merge_into_state_set(new_item, &mut chart[cur_tok + adv]) || effect;
295        }
296    }
297    effect
298}
299
300// Returns whether anything happened
301fn merge_into_state_set(item: Item, items: &mut Vec<Item>) -> bool {
302    for i in items.iter() {
303        if i.similar(&item) {
304            if i.as_good_as(&item) {
305                return false; // no new information
306            }
307            log!("improved item: {:#?} vs. {:#?}\n", item, i);
308            i.merge(&item);
309            return true;
310        }
311    }
312    log!("new item: {:#?}\n", item);
313    items.push(item);
314
315    true
316}
317
318impl std::fmt::Debug for Item {
319    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
320        write!(
321            f,
322            "[({:?}){}({}.{}){}<{:?} - {:?}>]",
323            self.id,
324            self.start_idx,
325            self.rule,
326            self.pos,
327            if *self.done.borrow() { "✓" } else { "…" },
328            self.local_parse.borrow(),
329            self.wanted_by.borrow()
330        )
331    }
332}
333
334impl Item {
335    /// This is pointer equality on `rule` and `grammar` for speed.
336    /// Also, it intentionally ignores `done`, `local_parse`, and `wanted_by`,
337    ///  because those should be merged.
338    fn similar<'f>(&'f self, other: &'f Item) -> bool {
339        self.start_idx == other.start_idx
340            && &*self.rule as *const FormPat == &*other.rule as *const FormPat
341            && self.pos == other.pos
342            && self.pc.grammar.almost_ptr_eq(&other.pc.grammar)
343    }
344
345    /// `false` if `other` might provide new information
346    /// `true` if `other` definitely provides no new information
347    /// (this is conservative regarding `wanted_by`)
348    fn as_good_as<'f>(&'f self, other: &'f Item) -> bool {
349        assert!(self.similar(other));
350        (*self.done.borrow() == *other.done.borrow() || !*other.done.borrow()) // no more done?
351        && (*other.local_parse.borrow() <= *self.local_parse.borrow() ) // no "better" parse?
352        && (other.wanted_by.borrow().len() == 0 // no more wanted?
353            || (other.wanted_by.borrow().iter().all(
354                   |w| self.wanted_by.borrow().iter().any(|s_w| w == s_w))))
355    }
356
357    fn merge(&self, other: &Item) {
358        if *other.done.borrow() {
359            *self.done.borrow_mut() = true;
360        }
361
362        use std::cmp::Ordering::*;
363        let comparison = other.local_parse.borrow().partial_cmp(&*self.local_parse.borrow());
364        log!(
365            "\n(For {}) Merging {:?} and {:?}... ",
366            self.id.0,
367            *other.local_parse.borrow(),
368            *self.local_parse.borrow()
369        );
370        match comparison {
371            Some(Greater) => {
372                *self.local_parse.borrow_mut() = other.local_parse.borrow().clone();
373            }
374            Some(Equal) | Some(Less) => { /* no new information */ }
375            None => {
376                // TODO: maybe it would be nice to ignore ambiguities that "don't matter".
377                // But even if `local_parse` is different, the resulting `Ast` can be the same.
378                // So, maybe have `Ambiguous` store all possible ambiguities
379                let amb = LocalParse::Ambiguous(
380                    Box::new(self.local_parse.borrow().clone()),
381                    Box::new(other.local_parse.borrow().clone()),
382                );
383                *self.local_parse.borrow_mut() = amb;
384            }
385        }
386        log!("... into {:#?}\n", *self.local_parse.borrow());
387
388        for other_wanted in other.wanted_by.borrow().iter() {
389            let mut has = false;
390            for self_wanted in self.wanted_by.borrow().iter() {
391                if self_wanted == other_wanted {
392                    has = true;
393                    break;
394                }
395            }
396
397            if !has {
398                self.wanted_by.borrow_mut().push(*other_wanted)
399            }
400        }
401    }
402
403    // -----------------------------------------------------------
404    // These methods all make a (singleton) set of items after progressing `self` somehow
405    // TODO: I have a lot of "like <one of these>, but" comments around this file...
406    //
407
408    fn finish_with(&self, lp: LocalParse, toks_consumed: usize) -> Vec<(Item, usize)> {
409        log!("[fin_w/ ({})]", self.id.0);
410        vec![(
411            Item {
412                done: RefCell::new(true),
413                local_parse: RefCell::new(lp),
414                pos: self.pos + 1,
415                ..self.clone()
416            },
417            toks_consumed,
418        )]
419    }
420
421    fn start(&self, rule: &Rc<FormPat>, cur_idx: usize) -> Vec<(Item, usize)> {
422        log!("[start ({})]", self.id.0);
423        vec![(
424            Item {
425                start_idx: cur_idx,
426                rule: rule.clone(),
427                pos: 0,
428                done: RefCell::new(false),
429                pc: self.pc.clone(),
430                common: self.common,
431                file_id: self.file_id,
432                local_parse: RefCell::new(LocalParse::NothingYet),
433                id: get_next_id(),
434                wanted_by: Rc::new(RefCell::new(vec![self.id.get_ref()])),
435            },
436            0,
437        )]
438    }
439
440    // -----------------------------------------------------------
441
442    /// See what new items this item justifies
443    fn examine(&self, toks: &str, cur_idx: usize, chart: &[Vec<Item>]) -> Vec<(Item, usize)> {
444        let mut res = if *self.done.borrow() {
445            let mut waiting_satisfied = vec![];
446
447            log!("({:#?}) done; {} items want it\n", self, (*self.wanted_by.borrow()).len());
448
449            for &waiting_item_id in self.wanted_by.borrow().iter() {
450                if let Some(waiting_item) =
451                    chart[self.start_idx].iter().find(|i| i.id.is(waiting_item_id))
452                {
453                    // It's `None` if it's the startier item
454
455                    let me_justif = JustifiedByItem(self.id.get_ref());
456
457                    // We `finish_with` here for things that are waiting on other items,
458                    //  in `shift_or_predict` for leaves.
459                    // Except for `Seq`. TODO: why?
460                    let mut more = match *waiting_item.rule {
461                        Anyways(_) | Impossible | Scan(_, _) => {
462                            icp!("{:#?} should not be waiting for anything!", waiting_item)
463                        }
464                        Seq(ref subs) => {
465                            if (waiting_item.pos) as usize == subs.len() {
466                                vec![]
467                            } else {
468                                // Like `waiting_item.advance`, but with a local_parse
469                                vec![(
470                                    Item {
471                                        pos: waiting_item.pos + 1,
472                                        local_parse: RefCell::new(me_justif),
473                                        ..waiting_item.clone()
474                                    },
475                                    0,
476                                )]
477                            }
478                        }
479                        Plus(_) | Star(_) => {
480                            // It'll also keep going, though!
481                            waiting_item.finish_with(me_justif, 0)
482                        }
483                        SynImport(_, _, _) if waiting_item.pos == 0 => vec![(
484                            Item {
485                                pos: 1,
486                                local_parse: RefCell::new(me_justif),
487                                ..waiting_item.clone()
488                            },
489                            0,
490                        )],
491                        VarRef(_)
492                        | Alt(_)
493                        | Call(_)
494                        | Scope(_, _)
495                        | Pick(_, _)
496                        | Named(_, _)
497                        | SynImport(_, _, _)
498                        | NameImport(_, _)
499                        | NameImportPhaseless(_, _)
500                        | QuoteDeepen(_, _)
501                        | QuoteEscape(_, _)
502                        | Common(_) => waiting_item.finish_with(me_justif, 0),
503                        // Using `c_parse` instead of `local_parse` here is weird,
504                        //  but probably necessary to allow `Call` under `Reserved`.
505                        Reserved(_, ref name_list) => match self.c_parse(chart, cur_idx) {
506                            Ok(ast_with_name) => match ast_with_name.c() {
507                                ast::Atom(name) | ast::VariableReference(name) => {
508                                    if name_list.contains(&name) {
509                                        vec![]
510                                    } else {
511                                        waiting_item.finish_with(me_justif, 0)
512                                    }
513                                }
514                                _ => {
515                                    log!("found something unusual {:?}", ast_with_name);
516                                    vec![]
517                                }
518                            },
519                            Err(_) => {
520                                log!("passing an error through `Reserved`");
521                                waiting_item.finish_with(me_justif, 0)
522                            }
523                        },
524                        Literal(_, expected) => match self.c_parse(chart, cur_idx) {
525                            Ok(ast) => {
526                                if ast.c() == &ast::Atom(expected) {
527                                    waiting_item.finish_with(me_justif, 0)
528                                } else {
529                                    vec![]
530                                }
531                            }
532                            _other => {
533                                log!("passing an error through `Literal`");
534                                waiting_item.finish_with(me_justif, 0)
535                            }
536                        },
537                        Biased(ref _plan_a, ref plan_b) => {
538                            if &*self.rule as *const FormPat == &**plan_b as *const FormPat {
539                                waiting_item.finish_with(JustifiedByItemPlanB(self.id.get_ref()), 0)
540                            } else {
541                                waiting_item.finish_with(me_justif, 0)
542                            }
543                        }
544                    };
545
546                    waiting_satisfied.append(&mut more);
547                }
548            }
549            waiting_satisfied
550        } else {
551            vec![]
552        };
553
554        if !res.is_empty() {
555            if let Call(_) = *res[0].0.rule {
556                // HACK: I think that `Call` is uninformative
557            } else if !self.common {
558                // Possibly set the high-water mark for parse error purposes:
559                best_token.with(|bt| {
560                    if cur_idx > bt.borrow().0 {
561                        // We've parsed further than ever before
562                        *bt.borrow_mut() = (cur_idx, res[0].0.rule.clone(), res[0].0.pos)
563                    } else if cur_idx == bt.borrow().0 && res[0].0.pos > bt.borrow().2 {
564                        // We're deeper into a rule than ever before
565                        *bt.borrow_mut() = (cur_idx, res[0].0.rule.clone(), res[0].0.pos)
566                    }
567                });
568            }
569        }
570
571        res.append(&mut self.shift_or_predict(toks, cur_idx, chart));
572
573        res
574    }
575
576    fn shift_or_predict(
577        &self,
578        toks: &str,
579        cur_idx: usize,
580        chart: &[Vec<Item>],
581    ) -> Vec<(Item, usize)> {
582        // Try to shift (bump `pos`, or set `done`) or predict (`start` a new item)
583        match (self.pos, &*(self.rule.clone())) {
584            // TODO: is there a better way to match in `Rc`?
585            (0, &Anyways(ref a)) => self.finish_with(ParsedAtom(a.clone()), 0),
586            (_, &Impossible) => vec![],
587            (0, &Literal(ref sub, _)) => self.start(sub, cur_idx),
588            (0, &Scan(crate::grammar::Scanner(ref regex), _)) => {
589                let mut caps = regex.capture_locations();
590                if regex.captures_read(&mut caps, &toks[cur_idx..]).is_some() {
591                    match caps.get(1) {
592                        Some((start, end)) => {
593                            // These are byte indices!
594                            self.finish_with(
595                                ParsedAtom(Ast(Rc::new(LocatedAst {
596                                    c: ast::Atom(n(&toks[cur_idx + start..cur_idx + end])),
597                                    file_id: self.file_id,
598                                    begin: cur_idx + start,
599                                    end: cur_idx + end,
600                                }))),
601                                caps.get(0).unwrap().1, // End of the *whole string consumed*
602                            )
603                        }
604                        None => self.finish_with(NothingYet, caps.get(0).unwrap().1),
605                    }
606                } else {
607                    vec![]
608                }
609            }
610            (0, &VarRef(ref sub)) => self.start(sub, cur_idx),
611            (pos, &Seq(ref subs)) => {
612                if pos < subs.len() {
613                    self.start(&subs[pos as usize], cur_idx)
614                } else if pos == subs.len() {
615                    // a little like `.finish`, but without advancing
616                    vec![(Item { done: RefCell::new(true), ..self.clone() }, 0)]
617                } else {
618                    vec![]
619                }
620            }
621            (_, &Star(ref sub)) => {
622                // Special case: the elegant thing would be to create `Star` pre-`done`
623                let mut res = if self.pos == 0 {
624                    // Like `.finish`, but without advancing
625                    vec![(Item { done: RefCell::new(true), ..self.clone() }, 0)]
626                } else {
627                    vec![]
628                };
629                res.append(&mut self.start(&sub, cur_idx)); // But we can take more!
630                res
631            }
632            (_, &Plus(ref sub)) => self.start(&sub, cur_idx),
633            (0, &Alt(ref subs)) => {
634                let mut res = vec![];
635                for sub in subs {
636                    res.append(&mut self.start(&(*sub), cur_idx));
637                }
638                res
639            }
640            // Needs special handling elsewhere!
641            (0, &Biased(ref plan_a, ref plan_b)) => {
642                let mut res = self.start(&plan_a, cur_idx);
643                res.append(&mut self.start(&plan_b, cur_idx));
644                res
645            }
646            (0, &Call(n)) => self.start(&self.pc.grammar.find_or_panic(&n), cur_idx),
647            (0, &Scope(ref f, _)) => {
648                // form.grammar is a FormPat. Confusing!
649                self.start(&f.grammar, cur_idx)
650            }
651            (0, &Pick(ref body, _)) => self.start(&body, cur_idx),
652            (0, &SynImport(ref lhs, _, _)) => self.start(&lhs, cur_idx),
653            (1, &SynImport(_, ref body, ref f)) => {
654                // TODO: handle errors properly! Probably need to memoize, also!
655                // TODO: an ambiguity or error here leads to a pretty confusing parse failure.
656                // It ought to be an outright parse error,
657                //  even if it theoretically could get papered over,
658                //  but we don't have a way to communicate those here.
659                let partial_parse = match *self.local_parse.borrow() {
660                    NothingYet => return vec![],
661                    Ambiguous(_, _) => {
662                        println!(
663                            "Warning: ambiguity in syntax import! {:?}",
664                            *self.local_parse.borrow()
665                        );
666                        return vec![];
667                    }
668                    ParsedAtom(ref a) => a.clone(),
669                    JustifiedByItem(_) | JustifiedByItemPlanB(_) => {
670                        match self
671                            .find_wanted(chart, cur_idx)
672                            .map(|item| item.c_parse(chart, cur_idx))
673                        {
674                            Ok(Ok(ast)) => ast,
675                            e => {
676                                println!("Warning: error in syntax import! {:?}", e);
677                                return vec![];
678                            }
679                        }
680                    }
681                };
682
683                // We can't use `.or_insert_with()` here,
684                //  since "import" can encounter grammars with extensions while extending!
685                let existing_pc = all_parse_contexts
686                    .with(|grammars| grammars.borrow().get(&self.id.get_ref()).cloned());
687
688                let new_ctxt = match existing_pc {
689                    Some(pc) => pc,
690                    None => {
691                        let new_ctxt = f.0((*self.pc).clone(), partial_parse);
692                        all_parse_contexts.with(|grammars| {
693                            grammars.borrow_mut().insert(self.id.get_ref(), new_ctxt.clone())
694                        });
695                        new_ctxt
696                    }
697                };
698
699                vec![(
700                    Item {
701                        start_idx: cur_idx,
702                        rule: body.clone(),
703                        pos: 0,
704                        done: RefCell::new(false),
705                        pc: Rc::new(new_ctxt),
706                        common: false,
707                        file_id: self.file_id,
708                        local_parse: RefCell::new(LocalParse::NothingYet),
709                        id: get_next_id(),
710                        wanted_by: Rc::new(RefCell::new(vec![self.id.get_ref()])),
711                    },
712                    0,
713                )]
714            }
715            (0, &Named(_, ref body))
716            | (0, &NameImport(ref body, _))
717            | (0, &NameImportPhaseless(ref body, _))
718            | (0, &QuoteDeepen(ref body, _))
719            | (0, &QuoteEscape(ref body, _))
720            | (0, &Reserved(ref body, _)) => self.start(&body, cur_idx),
721            (0, &Common(ref body)) => {
722                let mut res = self.start(&body, cur_idx);
723                res[0].0.common = true; // Only has one element
724                res
725            }
726            // Rust rightly complains that this is unreachable; yay!
727            // But how do I avoid a catch-all pattern for the pos > 0 case?
728            //(0, _) =>  { icp!("unhandled FormPat") },
729            _ => vec![], // end of a rule
730        }
731    }
732
733    fn find_wanted<'f, 'c>(
734        &'f self,
735        chart: &'c [Vec<Item>],
736        done_tok: usize,
737    ) -> Result<&'c Item, ParseError> {
738        let mut first_found: Option<&Item> = None;
739        let local_parse = self.local_parse.borrow().clone();
740        let desired_id = match local_parse {
741            JustifiedByItem(id) | JustifiedByItemPlanB(id) => id,
742            Ambiguous(ref l, ref r) => {
743                // HACK: this is quite ugly!
744                let l = *l.clone();
745                let r = *r.clone();
746                log!("===Ambiguity===\n");
747                // Show both parses...
748                *self.local_parse.borrow_mut() = l;
749                let l_res = self.c_parse(chart, done_tok).unwrap();
750                *self.local_parse.borrow_mut() = r;
751                let r_res = self.c_parse(chart, done_tok).unwrap();
752                return Err(ParseError {
753                    msg: format!("Ambiguity! \n=L=>{}\n=R=>{}\n", l_res, r_res),
754                });
755            }
756            _ => icp!("tried to parse unjustified item: {:#?} ", self),
757        };
758        log!("We are {:#?} at {}...\n", self, done_tok);
759
760        for idx in 0..chart[done_tok].len() {
761            let i = &chart[done_tok][idx];
762
763            if i.id.is(desired_id) {
764                match first_found {
765                    None => {
766                        first_found = Some(i);
767                    }
768                    Some(_) => icp!("unacknowledged ambiguity!"),
769                }
770            }
771        }
772
773        Ok(first_found.expect("ICP: no parse after successful recognition"))
774    }
775
776    /// Put location information on an `AstContents`, forming an `Ast`.
777    fn locate(&self, done_tok: usize, c: ast::AstContents) -> Ast {
778        Ast(Rc::new(ast::LocatedAst {
779            c: c,
780            file_id: self.file_id,
781            begin: self.start_idx,
782            end: done_tok,
783        }))
784    }
785
786    /// After the chart is built, we parse...
787    fn c_parse(&self, chart: &[Vec<Item>], done_tok: usize) -> ParseResult {
788        log!("Tring to parse {:#?}...\n", self);
789        // assert!(*self.done.borrow()); // false during ambiguity reporting
790        let res = match *self.rule {
791            Anyways(ref a) => Ok(a.clone()),
792            Impossible => icp!("Parser parsed the impossible!"),
793            Scan(_, _) => match self.local_parse.borrow().clone() {
794                ParsedAtom(a) => Ok(a),
795                NothingYet => Ok(ast!((trivial))), // TODO: should we fake location info here?
796                _ => icp!(),
797            },
798            VarRef(_) => {
799                let var_ast = self.find_wanted(chart, done_tok)?.c_parse(chart, done_tok)?;
800                match var_ast.c() {
801                    ast::Atom(a) => Ok(var_ast.with_c(ast::VariableReference(*a))),
802                    _ => icp!("no atom saved"),
803                }
804            }
805            Literal(_, _) | Alt(_) | Biased(_, _) | Call(_) | Reserved(_, _) | Common(_) => {
806                self.find_wanted(chart, done_tok)?.c_parse(chart, done_tok)
807            }
808            Seq(_) | Star(_) | Plus(_) | SynImport(_, _, _) => {
809                let mut step = self;
810                let mut subtrees: Vec<Ast> = vec![];
811                let mut pos = done_tok;
812                // Walk over "previous incarnations" of `self`
813                // TODO: It seems like I often have had the thought
814                //  "Why not put a nullable pattern under `Star`? What could go wrong?"
815                // Do something about that...
816                loop {
817                    log!("Trying to take a step...\n");
818
819                    // Special case: we can't start the loop because there are 0 children
820                    if let NothingYet = step.local_parse.borrow().clone() {
821                        break;
822                    }
823
824                    let sub = step.find_wanted(chart, pos)?;
825                    subtrees.push(sub.c_parse(chart, pos)?);
826                    if sub.start_idx == self.start_idx && step.pos == 1 {
827                        break;
828                    } else {
829                        pos = sub.start_idx;
830                        let mut found = false;
831                        for idx in 0..chart[pos].len() {
832                            let i = &chart[pos][idx];
833                            log!("Checking {:#?}\n", i);
834                            if self.pc.grammar.almost_ptr_eq(&i.pc.grammar)
835                                && &*self.rule as *const FormPat == &*i.rule as *const FormPat
836                                && step.pos - 1 == i.pos
837                            {
838                                step = i;
839                                found = true;
840                                break;
841                            }
842                        }
843                        if !found {
844                            icp!("Can't find item previous to {:#?}", step)
845                        }
846                    }
847                }
848                subtrees.reverse();
849
850                Ok(self.locate(done_tok, match *self.rule {
851                    Seq(_) | SynImport(_, _, _) => ast::Shape(subtrees),
852                    Star(_) | Plus(_) => ast::IncompleteNode(EnvMBE::new_from_anon_repeat(
853                        subtrees.into_iter().map(|a| a.flatten()).collect(),
854                    )),
855                    _ => icp!("seriously, this can't happen"),
856                }))
857            }
858            Named(name, _) => {
859                let sub_parsed = self.find_wanted(chart, done_tok)?.c_parse(chart, done_tok)?;
860                Ok(sub_parsed.with_c(ast::IncompleteNode(EnvMBE::new_from_leaves(Assoc::single(
861                    name,
862                    sub_parsed.clone(),
863                )))))
864            }
865            Scope(ref form, ref export) => {
866                let sub_parsed = self.find_wanted(chart, done_tok)?.c_parse(chart, done_tok)?;
867                // TODO #14: We should add zero-length repeats of missing `Named`s,
868                Ok(sub_parsed.with_c(ast::Node(form.clone(), sub_parsed.flatten(), export.clone())))
869            }
870            Pick(_, name) => {
871                let sub_parsed = self.find_wanted(chart, done_tok)?.c_parse(chart, done_tok)?;
872                sub_parsed
873                    .flatten()
874                    .get_leaf(name)
875                    .ok_or_else(|| ParseError {
876                        msg: format!("Nothing named {} in {:?}", name, sub_parsed),
877                    })
878                    .map(std::clone::Clone::clone)
879            }
880            NameImport(_, ref beta) => {
881                let sub_parsed = self.find_wanted(chart, done_tok)?.c_parse(chart, done_tok)?;
882                Ok(sub_parsed.with_c(ast::ExtendEnv(sub_parsed.clone(), beta.clone())))
883            }
884            NameImportPhaseless(_, ref beta) => {
885                let sub_parsed = self.find_wanted(chart, done_tok)?.c_parse(chart, done_tok)?;
886                Ok(sub_parsed.with_c(ast::ExtendEnvPhaseless(sub_parsed.clone(), beta.clone())))
887            }
888            QuoteDeepen(_, pos) => {
889                let sub_parsed = self.find_wanted(chart, done_tok)?.c_parse(chart, done_tok)?;
890                Ok(sub_parsed.with_c(ast::QuoteMore(sub_parsed.clone(), pos)))
891            }
892            QuoteEscape(_, depth) => {
893                let sub_parsed = self.find_wanted(chart, done_tok)?.c_parse(chart, done_tok)?;
894                Ok(sub_parsed.with_c(ast::QuoteLess(sub_parsed.clone(), depth)))
895            }
896        };
897        log!(">>>{:?}<<<\n", res);
898
899        res
900    }
901}
902
903type ParseResult = Result<Ast, ParseError>;
904
905#[derive(PartialEq, Eq, Debug, Clone)]
906pub struct ParseError {
907    pub msg: String,
908}
909
910pub fn parse(rule: &FormPat, pc: ParseContext, toks: &str) -> ParseResult {
911    // `parse` gets called recursively, so stash our mutable state:
912    let orig_best_token = best_token.with(|bt| {
913        let orig = bt.borrow().clone();
914        *bt.borrow_mut() = (0, Rc::new(rule.clone()), 0);
915        orig
916    });
917
918    let file_id = files.with(|f| f.borrow_mut().add("[filename]".to_string(), toks.to_string()));
919
920    let (start_but_startier, chart) = create_chart(Rc::new(rule.clone()), pc, toks, file_id);
921    let final_item = chart[chart.len() - 1].iter().find(|item| {
922        (*item.wanted_by.borrow()).iter().any(|idr| start_but_startier.is(*idr))
923            && *item.done.borrow()
924    });
925    log!("\n-------\n");
926    let result = match final_item {
927        Some(i) => i.c_parse(&chart, chart.len() - 1),
928        None => best_token.with(|bt| {
929            let (idx, ref grammar, pos) = *bt.borrow();
930
931            let line_begin = toks[0..idx].rfind('\n').map(|n| n + 1).unwrap_or(0);
932            let line_end = toks[idx..toks.len()].find('\n').map(|n| n + idx).unwrap_or(toks.len());
933            let line_number = toks[0..idx].matches('\n').count() + 1;
934
935            Err(ParseError {
936                msg: format!(
937                    "Could not parse past “{}•{}” (on line {}) \nin rule {}",
938                    &toks[line_begin..idx],
939                    &toks[idx..line_end],
940                    line_number,
941                    grammar.mark_up(Some(pos)),
942                ),
943            })
944        }),
945    };
946
947    best_token.with(|bt| *bt.borrow_mut() = orig_best_token); // unstash
948
949    result
950}
951
952pub fn parse_in_syn_env(rule: &FormPat, syn_env: SynEnv, toks: &str) -> ParseResult {
953    parse(rule, ParseContext::new_from_grammar(syn_env), toks)
954}
955
956fn parse_top(rule: &FormPat, toks: &str) -> ParseResult {
957    parse(rule, ParseContext::new_from_grammar(Assoc::new()), toks)
958}
959
960#[test]
961fn earley_merging() {
962    let one_rule = crate::grammar::new_scan("whatever", None);
963    let another_rule = Impossible;
964    let main_grammar = assoc_n!("a" => Rc::new(form_pat!((scan "irrelevant"))));
965    let another_grammar = assoc_n!("a" => Rc::new(form_pat!((scan "irrelevant"))));
966    let mut state_set = vec![];
967
968    let basic_item = Item {
969        start_idx: 0,
970        rule: Rc::new(one_rule),
971        pos: 0,
972        pc: Rc::new(ParseContext::new_from_grammar(main_grammar.clone())),
973        common: false,
974        file_id: 0,
975        id: get_next_id(),
976        done: RefCell::new(false),
977        local_parse: RefCell::new(LocalParse::NothingYet),
978        wanted_by: Rc::new(RefCell::new(vec![])),
979    };
980
981    assert_eq!(merge_into_state_set(basic_item.clone(), &mut state_set), true);
982    assert_eq!(state_set.len(), 1);
983
984    // exactly the same (except a different ID)
985    assert_eq!(merge_into_state_set(basic_item.clone(), &mut state_set), false);
986    assert_eq!(state_set.len(), 1);
987
988    // (not done yet)
989    assert_eq!(*state_set[0].done.borrow(), false);
990
991    // improvement in doneness, mergable
992    assert_eq!(
993        merge_into_state_set(
994            Item { done: RefCell::new(true), ..basic_item.clone() },
995            &mut state_set
996        ),
997        true
998    );
999    assert_eq!(state_set.len(), 1);
1000    // now done!
1001    assert_eq!(*state_set[0].done.borrow(), true);
1002
1003    // not an improvement this time!
1004    assert_eq!(
1005        merge_into_state_set(
1006            Item { done: RefCell::new(true), ..basic_item.clone() },
1007            &mut state_set
1008        ),
1009        false
1010    );
1011    assert_eq!(state_set.len(), 1);
1012
1013    // not as good as
1014    assert_eq!(
1015        merge_into_state_set(
1016            Item { done: RefCell::new(false), ..basic_item.clone() },
1017            &mut state_set
1018        ),
1019        false
1020    );
1021    assert_eq!(state_set.len(), 1);
1022    // still done!
1023    assert_eq!(*state_set[0].done.borrow(), true);
1024
1025    // different rule
1026    assert_eq!(
1027        merge_into_state_set(
1028            Item { rule: Rc::new(another_rule), ..basic_item.clone() },
1029            &mut state_set
1030        ),
1031        true
1032    );
1033    assert_eq!(state_set.len(), 2);
1034
1035    // different grammar (pointer-wise!)
1036    assert_eq!(
1037        merge_into_state_set(
1038            Item {
1039                pc: Rc::new((*basic_item.pc).clone().with_grammar(another_grammar.clone())),
1040                ..basic_item.clone()
1041            },
1042            &mut state_set
1043        ),
1044        true
1045    );
1046    assert_eq!(state_set.len(), 3);
1047
1048    let id1 = get_next_id().get_ref();
1049    let id2 = get_next_id().get_ref();
1050    let id3 = get_next_id().get_ref();
1051    // log!("AGA {:#?},{:#?},{:#?},{:#?}\n",
1052    // (*self.done.borrow() == *other.done.borrow() || !*other.done.borrow())
1053    // , *self.local_parse.borrow() == *other.local_parse.borrow()
1054    // , *(other.local_parse).borrow() == LocalParse::NothingYet
1055    // , (other.wanted_by.borrow().len() == 0
1056    // || (other.wanted_by.borrow().iter().all(
1057    // |w| self.wanted_by.borrow().iter().find(|s_w| w == *s_w).is_some())))
1058    //
1059    // );
1060    // log!("  {:#?}=?{:#?}\n", *self.local_parse.borrow(), *other.local_parse.borrow());
1061    let wanted_item = |ids| Item { wanted_by: Rc::new(RefCell::new(ids)), ..basic_item.clone() };
1062
1063    // test self-check (this shouldn't be interesting)
1064    assert_eq!(merge_into_state_set(wanted_item(vec![]), &mut state_set), false);
1065    assert_eq!(state_set.len(), 3);
1066
1067    assert_eq!(merge_into_state_set(wanted_item(vec![id1]), &mut state_set), true);
1068    assert_eq!(state_set.len(), 3);
1069
1070    // but another one doesn't have any effect
1071    assert_eq!(merge_into_state_set(wanted_item(vec![id1]), &mut state_set), false);
1072    assert_eq!(state_set.len(), 3);
1073
1074    assert_eq!(merge_into_state_set(wanted_item(vec![id2]), &mut state_set), true);
1075    assert_eq!(state_set.len(), 3);
1076
1077    assert_eq!(merge_into_state_set(wanted_item(vec![id1, id2]), &mut state_set), false);
1078    assert_eq!(state_set.len(), 3);
1079
1080    assert_eq!(merge_into_state_set(wanted_item(vec![id2, id1]), &mut state_set), false);
1081    assert_eq!(state_set.len(), 3);
1082
1083    assert_eq!(merge_into_state_set(wanted_item(vec![id2, id3]), &mut state_set), true);
1084    assert_eq!(state_set.len(), 3);
1085
1086    // TODO: we ought to test the NothingYet - JustifiedByItem() / ParsedAtom() - Ambiguous lattice
1087}
1088
1089#[test]
1090fn earley_simple_recognition() {
1091    let main_grammar = Assoc::new();
1092
1093    let atom = Rc::new(crate::grammar::new_scan(r"\s*(\S+)", None));
1094
1095    // 0-length strings
1096
1097    assert_eq!(recognize(&*atom, &main_grammar, tokens_s!()), false);
1098
1099    assert_eq!(recognize(&Anyways(ast!((trivial))), &main_grammar, tokens_s!()), true);
1100
1101    assert_eq!(recognize(&Seq(vec![]), &main_grammar, tokens_s!()), true);
1102
1103    assert_eq!(recognize(&Seq(vec![atom.clone()]), &main_grammar, tokens_s!()), false);
1104
1105    assert_eq!(recognize(&Star(Rc::new(Impossible)), &main_grammar, tokens_s!()), true);
1106
1107    assert_eq!(recognize(&Star(atom.clone()), &main_grammar, tokens_s!()), true);
1108
1109    // 1-length strings
1110
1111    assert_eq!(recognize(&*atom, &main_grammar, tokens_s!("Pierre_Menard")), true);
1112
1113    assert_eq!(recognize(&Impossible, &main_grammar, tokens_s!("Pierre_Menard")), false);
1114
1115    assert_eq!(
1116        recognize(
1117            &Literal(atom.clone(), n("Cervantes")),
1118            &main_grammar,
1119            tokens_s!("Pierre_Menard")
1120        ),
1121        false
1122    );
1123
1124    assert_eq!(
1125        recognize(&Literal(atom.clone(), n("Cervantes")), &main_grammar, tokens_s!("Cervantes")),
1126        true
1127    );
1128
1129    assert_eq!(recognize(&Seq(vec![atom.clone()]), &main_grammar, tokens_s!("P.M.")), true);
1130
1131    assert_eq!(recognize(&Star(atom.clone()), &main_grammar, tokens_s!("PM")), true);
1132
1133    assert_eq!(
1134        recognize(
1135            &Alt(vec![Rc::new(Impossible), atom.clone()]),
1136            &main_grammar,
1137            tokens_s!("Pierre_Menard")
1138        ),
1139        true
1140    );
1141
1142    assert_eq!(
1143        recognize(
1144            &Alt(vec![Rc::new(Impossible), Rc::new(Literal(atom.clone(), n("Cervantes")))]),
1145            &main_grammar,
1146            tokens_s!("Pierre_Menard")
1147        ),
1148        false
1149    );
1150
1151    assert_eq!(
1152        recognize(
1153            &Biased(Rc::new(Impossible), atom.clone()),
1154            &main_grammar,
1155            tokens_s!("Pierre_Menard")
1156        ),
1157        true
1158    );
1159
1160    assert_eq!(
1161        recognize(
1162            &Biased(atom.clone(), Rc::new(Impossible)),
1163            &main_grammar,
1164            tokens_s!("Pierre_Menard")
1165        ),
1166        true
1167    );
1168
1169    // Nesting!
1170
1171    assert_eq!(
1172        recognize(
1173            &Seq(vec![Rc::new(Seq(vec![Rc::new(Seq(vec![atom.clone()]))]))]),
1174            &main_grammar,
1175            tokens_s!("Frustrated_Novelist_No_Good_At_Describing_Hands")
1176        ),
1177        true
1178    );
1179
1180    assert_eq!(
1181        recognize(
1182            &Alt(vec![Rc::new(Alt(vec![Rc::new(Alt(vec![atom.clone()]))]))]),
1183            &main_grammar,
1184            tokens_s!("(no_pun_intended,_by_the_way)") // What pun?
1185        ),
1186        true
1187    );
1188
1189    assert_eq!(
1190        recognize(
1191            &Plus(Rc::new(Plus(Rc::new(Plus(atom.clone()))))),
1192            &main_grammar,
1193            tokens_s!("(except_I_might've_changed_it_otherwise)")
1194        ),
1195        true
1196    );
1197
1198    // Fine, there are longer strings.
1199
1200    assert_eq!(
1201        recognize(
1202            &Seq(vec![atom.clone(), atom.clone(), atom.clone()]),
1203            &main_grammar,
1204            tokens_s!("Author" "of_the" "Quixote")
1205        ),
1206        true
1207    );
1208
1209    assert_eq!(
1210        recognize(
1211            &Seq(vec![atom.clone(), atom.clone(), atom.clone()]),
1212            &main_grammar,
1213            tokens_s!("Author" "of" "the" "Quixote")
1214        ),
1215        false
1216    );
1217
1218    assert_eq!(
1219        recognize(
1220            &Seq(vec![atom.clone(), atom.clone(), atom.clone()]),
1221            &main_grammar,
1222            tokens_s!("Author_of" "the_Quixote")
1223        ),
1224        false
1225    );
1226
1227    assert_eq!(
1228        recognize(
1229            &Plus(Rc::new(Plus(Rc::new(Plus(atom.clone()))))),
1230            &main_grammar,
1231            tokens_s!("Author" "of" "the" "Quixote")
1232        ),
1233        true
1234    );
1235}
1236
1237#[test]
1238fn earley_env_recognition() {
1239    fn mk_lt(s: &str) -> Rc<FormPat> {
1240        Rc::new(Literal(Rc::new(crate::grammar::new_scan(r"\s*(\S+)", None)), n(s)))
1241    }
1242    let env = assoc_n!(
1243        "empty" => Rc::new(Seq(vec![])),
1244        "empty_indirect" => Rc::new(Call(n("empty"))),
1245        "impossible" => Rc::new(Impossible),
1246        "a" => mk_lt("a"),
1247        "aaa" => Rc::new(Star(mk_lt("a"))),
1248        "aaa_indirect" => Rc::new(Star(Rc::new(Call(n("a"))))),
1249        "aaaa" => Rc::new(Plus(mk_lt("a"))),
1250        "xxx" => Rc::new(Star(mk_lt("x"))),
1251        "l_rec_axxx" => Rc::new(Alt(vec![Rc::new(Seq(vec![Rc::new(Call(n("l_rec_axxx"))),
1252                                                          mk_lt("x")])),
1253                                         mk_lt("a")])),
1254        "l_rec_axxx_hard" => Rc::new(
1255            Alt(vec![Rc::new(Seq(vec![Rc::new(Call(n("l_rec_axxx_hard"))),
1256                                      Rc::new(Alt(vec![mk_lt("x"),
1257                                                       Rc::new(Call(n("impossible"))),
1258                                                       Rc::new(Call(n("empty_indirect")))]))])),
1259                                      Rc::new(Call(n("impossible"))),
1260                                      mk_lt("a")])),
1261        "r_rec_xxxa" => Rc::new(Alt(vec![Rc::new(Seq(vec![mk_lt("x"),
1262                                                  Rc::new(Call(n("r_rec_xxxa")))])),
1263                                 mk_lt("a")])),
1264        "l_rec_aaaa" => Rc::new(Alt(vec![Rc::new(Seq(vec![Rc::new(Call(n("l_rec_aaaa"))),
1265                                                          mk_lt("a")])),
1266                                 mk_lt("a")])),
1267        "r_rec_aaaa" => Rc::new(Alt(vec![Rc::new(Seq(vec![mk_lt("a"),
1268                                                          Rc::new(Call(n("r_rec_aaaa")))])),
1269                                 mk_lt("a")])));
1270
1271    assert_eq!(recognize(&*mk_lt("a"), &env, tokens_s!("a")), true);
1272    assert_eq!(recognize(&*mk_lt("a"), &env, tokens_s!("b")), false);
1273
1274    assert_eq!(recognize(&Call(n("empty")), &env, tokens_s!()), true);
1275    assert_eq!(recognize(&Call(n("empty")), &env, tokens_s!("not empty!")), false);
1276
1277    assert_eq!(recognize(&Call(n("empty_indirect")), &env, tokens_s!()), true);
1278    assert_eq!(recognize(&Call(n("empty_indirect")), &env, tokens_s!("not empty!")), false);
1279
1280    assert_eq!(recognize(&Call(n("aaa")), &env, tokens_s!()), true);
1281    assert_eq!(recognize(&Call(n("aaa")), &env, tokens_s!("a")), true);
1282    assert_eq!(recognize(&Call(n("aaa")), &env, tokens_s!("a" "a")), true);
1283    assert_eq!(recognize(&Call(n("aaa")), &env, tokens_s!("a" "a" "a")), true);
1284    assert_eq!(recognize(&Call(n("aaa")), &env, tokens_s!("a" "x" "a")), false);
1285
1286    assert_eq!(recognize(&Call(n("aaaa")), &env, tokens_s!()), false);
1287    assert_eq!(recognize(&Call(n("aaaa")), &env, tokens_s!("a")), true);
1288    assert_eq!(recognize(&Call(n("aaaa")), &env, tokens_s!("a" "a")), true);
1289    assert_eq!(recognize(&Call(n("aaaa")), &env, tokens_s!("a" "a" "a")), true);
1290    assert_eq!(recognize(&Call(n("aaaa")), &env, tokens_s!("a" "x" "a")), false);
1291
1292    assert_eq!(recognize(&Call(n("aaa_indirect")), &env, tokens_s!()), true);
1293    assert_eq!(recognize(&Call(n("aaa_indirect")), &env, tokens_s!("a")), true);
1294    assert_eq!(recognize(&Call(n("aaa_indirect")), &env, tokens_s!("a" "a")), true);
1295    assert_eq!(recognize(&Call(n("aaa_indirect")), &env, tokens_s!("a" "a" "a")), true);
1296    assert_eq!(recognize(&Call(n("aaa_indirect")), &env, tokens_s!("a" "x" "a")), false);
1297
1298    for l_rec in vec!["l_rec_axxx", "l_rec_axxx_hard"] {
1299        assert_eq!(recognize(&Call(n(l_rec)), &env, tokens_s!("a")), true);
1300        assert_eq!(recognize(&Call(n(l_rec)), &env, tokens_s!("a" "x")), true);
1301        assert_eq!(recognize(&Call(n(l_rec)), &env, tokens_s!("a" "x" "x")), true);
1302        assert_eq!(recognize(&Call(n(l_rec)), &env, tokens_s!("a" "x" "x" "x")), true);
1303        assert_eq!(recognize(&Call(n(l_rec)), &env, tokens_s!("a" "a" "x" "x")), false);
1304        assert_eq!(recognize(&Call(n(l_rec)), &env, tokens_s!()), false);
1305        assert_eq!(recognize(&Call(n(l_rec)), &env, tokens_s!("a" "x" "a" "x")), false);
1306    }
1307}
1308
1309#[test]
1310fn basic_parsing_e() {
1311    fn mk_lt(s: &str) -> Rc<FormPat> {
1312        Rc::new(Literal(Rc::new(crate::grammar::new_scan(r"\s*(\S+)", None)), n(s)))
1313    }
1314
1315    let atom = Rc::new(crate::grammar::new_scan(r"\s*(\S+)", None));
1316
1317    assert_eq!(parse_top(&*atom, tokens_s!("asdf")), Ok(ast!("asdf")));
1318
1319    assert_eq!(parse_top(&Anyways(ast!("asdf")), tokens_s!()), Ok(ast!("asdf")));
1320
1321    assert_eq!(
1322        parse_top(&Biased(Rc::new(Anyways(ast!("A"))), Rc::new(Anyways(ast!("B")))), tokens_s!()),
1323        Ok(ast!("A"))
1324    );
1325
1326    assert_eq!(
1327        parse_top(&Named(n("c"), Rc::new(Anyways(ast!("asdf")))), tokens_s!()),
1328        Ok(ast!({- "c" => "asdf"}))
1329    );
1330
1331    assert_eq!(parse_top(&Seq(vec![atom.clone()]), tokens_s!("asdf")), Ok(ast_shape!("asdf")));
1332
1333    assert_eq!(
1334        parse_top(
1335            &Seq(vec![atom.clone(), mk_lt("fork"), atom.clone()]),
1336            tokens_s!("asdf" "fork" "asdf")
1337        ),
1338        Ok(ast_shape!("asdf" "fork" "asdf"))
1339    );
1340
1341    assert_eq!(
1342        parse_top(
1343            &Seq(vec![
1344                Rc::new(Seq(vec![mk_lt("aa"), mk_lt("ab")])),
1345                Rc::new(Seq(vec![mk_lt("ba"), mk_lt("bb")]))
1346            ]),
1347            tokens_s!("aa" "ab" "ba" "bb")
1348        ),
1349        Ok(ast_shape!(("aa" "ab") ("ba" "bb")))
1350    );
1351
1352    assert!(!parse_top(
1353        &Seq(vec![atom.clone(), mk_lt("fork"), atom.clone()]),
1354        tokens_s!("asdf" "knife" "asdf"),
1355    )
1356    .is_ok());
1357
1358    assert_eq!(
1359        parse_top(
1360            &form_pat!([(star (named "c", (alt (lit_aat "X"), (lit_aat "O")))), (lit_aat "!")]),
1361            tokens_s!("X" "O" "O" "O" "X" "X" "!")
1362        ),
1363        Ok(ast_shape!({- "c" => ["X", "O", "O", "O", "X", "X"]} "!"))
1364    );
1365
1366    assert_eq!(
1367        parse_top(
1368            &Seq(vec![Rc::new(Star(Rc::new(Named(n("c"), mk_lt("*"))))), mk_lt("X")]),
1369            tokens_s!("*" "*" "*" "*" "*" "X")
1370        ),
1371        Ok(ast_shape!({- "c" => ["*", "*", "*", "*", "*"] } "X"))
1372    );
1373
1374    assert_m!(
1375        parse_top(
1376            &form_pat!([(star (biased (lit_aat "a"), (lit_aat "b"))), (lit_aat "b")]),
1377            tokens_s!["a" "a" "b"]
1378        ),
1379        Ok(_)
1380    );
1381
1382    assert_m!(
1383        parse_top(
1384            &form_pat!([(star (biased (lit_aat "b"), (lit_aat "a"))), (lit_aat "b")]),
1385            tokens_s!["a" "a" "b"]
1386        ),
1387        Ok(_)
1388    );
1389
1390    assert_eq!(parse_top(&form_pat!([(lit_aat "b")]), tokens_s!["b"]), Ok(ast_shape!("b")));
1391
1392    assert_eq!(parse_top(&form_pat!(varref_aat), tokens_s!("Spoon")), Ok(ast!((vr "Spoon"))));
1393
1394    assert_eq!(
1395        parse_top(&form_pat!((reserved varref_aat, "Moon")), tokens_s!("Spoon")),
1396        Ok(ast!((vr "Spoon")))
1397    );
1398
1399    assert_eq!(
1400        parse_top(
1401            &form_pat!((alt (lit_aat "Moon"), (reserved varref_aat, "Moon"))),
1402            tokens_s!("Spoon")
1403        ),
1404        Ok(ast!((vr "Spoon")))
1405    );
1406
1407    let env = assoc_n!(
1408        "DefaultToken" => atom.clone()
1409    );
1410
1411    assert_eq!(
1412        parse_in_syn_env(
1413            &form_pat!((alt (lit_aat "Moon"), (reserved (scan r"\s*(\S+)"), "Moon"))),
1414            env.clone(),
1415            tokens_s!("Moon")
1416        ),
1417        Ok(ast!("Moon")) // Not a varref, so it's the literal instead
1418    );
1419
1420    assert_eq!(
1421        // `lit` calls "DefaultToken"
1422        parse_in_syn_env(&form_pat!((lit "Moon")), env.clone(), tokens_s!("Moon")),
1423        Ok(ast!("Moon"))
1424    );
1425}
1426
1427#[test]
1428fn ambiguity() {
1429    let no_env = Assoc::new();
1430    // TODO: prevent an infinite loop in this case:
1431    let _star_star = form_pat![(star (scan "a*"))];
1432
1433    let a_vs_a = form_pat![(alt (scan "a"), (scan "a"))];
1434
1435    assert_eq!(recognize(&a_vs_a, &no_env, "a"), true);
1436    assert_m!(parse_top(&a_vs_a, ""), Err(_));
1437
1438    let a_vs_aa__twice = form_pat![[(alt (scan "a"), (scan "aa")),
1439                                    (alt (scan "a"), (scan "aa"))]];
1440
1441    assert_eq!(recognize(&a_vs_aa__twice, &no_env, "aaa"), true);
1442    assert_m!(parse_top(&a_vs_aa__twice, "aaa"), Err(_));
1443
1444    assert_eq!(recognize(&a_vs_aa__twice, &no_env, "aaaa"), true);
1445    assert_m!(parse_top(&a_vs_aa__twice, "aaaa"), Ok(_)); // Shape of trivials. Sure.
1446}
libunseemly/earley.rs

libunseemly/
earley.rs