Skip to main content

tecta_peg/
lib.rs

1use std::{
2    collections::BTreeMap,
3    fmt::{Debug, Display},
4};
5
6use tecta_lex::{
7    Delimiter as GroupDelimiter, Span, Spanned, SpanningChars, pat_ident_body, pat_ident_start,
8    pat_punct,
9};
10
11/// A delimiter of a group in a token tree, or a sequence.
12#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
13pub enum AnyDelimiter {
14    /// `()`
15    Parenthesis,
16    /// `[]`
17    Bracket,
18    /// `{}`
19    Brace,
20    /// `<>`,
21    AngleBrackets,
22}
23impl AnyDelimiter {
24    pub fn to_group(&self) -> Option<GroupDelimiter> {
25        match self {
26            Self::Parenthesis => Some(GroupDelimiter::Parenthesis),
27            Self::Bracket => Some(GroupDelimiter::Bracket),
28            Self::Brace => Some(GroupDelimiter::Brace),
29            Self::AngleBrackets => None,
30        }
31    }
32    pub fn opener(&self) -> char {
33        match self {
34            Self::Parenthesis => '(',
35            Self::Bracket => '[',
36            Self::Brace => '{',
37            Self::AngleBrackets => '<',
38        }
39    }
40    pub fn closer(&self) -> char {
41        match self {
42            Self::Parenthesis => ')',
43            Self::Bracket => ']',
44            Self::Brace => '}',
45            Self::AngleBrackets => '>',
46        }
47    }
48}
49impl From<GroupDelimiter> for AnyDelimiter {
50    fn from(value: GroupDelimiter) -> Self {
51        match value {
52            GroupDelimiter::Parenthesis => Self::Parenthesis,
53            GroupDelimiter::Bracket => Self::Bracket,
54            GroupDelimiter::Brace => Self::Brace,
55        }
56    }
57}
58
59/// A transient control rule; cleared by other control characters.
60#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
61pub struct Control(pub ControlKind, pub Span);
62
63/// A specific variant of control rule.
64#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
65pub enum ControlKind {
66    /// Start of a sequence rule, beginning with `<` and ending with `>`.
67    SequenceStart,
68    /// Start of a group rule, beginning with one of `(`, `[`, or `{`, and ending with, respectively, `)`, `]`, or `}`.
69    GroupStart(GroupDelimiter),
70}
71
72/// At least some number of times.
73#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
74pub enum AtLeast {
75    /// Matches a rule zero or more times. Delimited by `*`.
76    Zero,
77    /// Matches a rule one or more times. Delimited by `+`.
78    One,
79}
80impl Display for AtLeast {
81    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
82        match self {
83            Self::One => write!(f, "+"),
84            Self::Zero => write!(f, "*"),
85        }
86    }
87}
88
89/// Repeats a rule a number of times, the repetition choice being decided by the operator used:
90/// - `*` repeats zero or more times
91/// - `+` repeats one or more times
92///
93/// The first operand is the element and the second is the separator.
94/// For example, `"x" ',' *` matches multiple instances of the keyword `x`, separated by commas.
95/// Trailing is enabled with the `~` modifier.
96#[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
97pub struct RepeatRule {
98    pub element: Box<Rule>,
99    pub separator: Box<Rule>,
100    pub at_least: AtLeast,
101    pub allow_trailing: bool,
102}
103
104/// A grammar rule.
105#[derive(Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
106pub struct Rule(pub RuleKind, pub Span);
107impl Rule {
108    pub fn sequence(rules: Vec<Rule>, span: Span) -> Self {
109        Self(RuleKind::Sequence(rules), span)
110    }
111    pub fn record(rules: Vec<(Spanned<Option<String>>, Rule)>, span: Span) -> Self {
112        Self(RuleKind::Record(rules), span)
113    }
114
115    pub fn choice(rules: Vec<Rule>, span: Span) -> Self {
116        Self(RuleKind::Choice(rules), span)
117    }
118    pub fn named_choice(rules: Vec<(Spanned<String>, Rule)>, span: Span) -> Self {
119        Self(RuleKind::NamedChoice(rules), span)
120    }
121
122    pub fn group(delimiter: GroupDelimiter, rule: Rule, span: Span) -> Self {
123        Self(RuleKind::Group(delimiter, Box::new(rule)), span)
124    }
125
126    pub fn repeat(element: Rule, separator: Rule, at_least: AtLeast, span: Span) -> Self {
127        Self(
128            RuleKind::Repeat(RepeatRule {
129                element: Box::new(element),
130                separator: Box::new(separator),
131                at_least,
132                allow_trailing: false,
133            }),
134            span,
135        )
136    }
137    pub fn optional(rule: Rule, span: Span) -> Self {
138        Self(RuleKind::Optional(Box::new(rule)), span)
139    }
140
141    pub fn punctuation(repr: String, span: Span) -> Self {
142        Self(RuleKind::Punctuation(repr), span)
143    }
144    pub fn keyword(repr: String, span: Span) -> Self {
145        Self(RuleKind::Keyword(repr), span)
146    }
147    pub fn other(repr: String, span: Span) -> Self {
148        Self(RuleKind::Other(repr), span)
149    }
150    pub fn builtin(repr: String, span: Span) -> Self {
151        Self(RuleKind::Builtin(repr), span)
152    }
153
154    pub fn peg(self) -> Peg {
155        Peg::Rule(self)
156    }
157}
158impl Debug for Rule {
159    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
160        write!(f, "{:?} @{}", self.0, self.1)
161    }
162}
163impl Display for Rule {
164    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
165        write!(f, "{}", self.0)
166    }
167}
168
169/// A specific grammar rule variant.
170#[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
171pub enum RuleKind {
172    /// Matches a sequence of rules, one after another.
173    Sequence(Vec<Rule>),
174    /// Like a sequence, but every rule can have a name.
175    Record(Vec<(Spanned<Option<String>>, Rule)>),
176
177    /// Begins all rules at the same point, using the first one that matches.
178    Choice(Vec<Rule>),
179    /// Like a choice, but every rule can have a name.
180    NamedChoice(Vec<(Spanned<String>, Rule)>),
181
182    /// Matches inside a token group.
183    Group(GroupDelimiter, Box<Rule>),
184
185    /// A [repeating rule][`RepeatRule`], delimited with `*` or `+`.
186    Repeat(RepeatRule),
187
188    /// Makes a rule optional (allowed to fail). Delimited with `?`.
189    Optional(Box<Rule>),
190
191    /// Matches a punctuation token.
192    Punctuation(String),
193    /// Matches a keyword token.
194    Keyword(String),
195
196    /// Matches a different rule.
197    Other(String),
198    /// Matches a built-in rule (denoted with `@`).
199    Builtin(String),
200}
201impl RuleKind {
202    pub fn with(self, span: impl Into<Span>) -> Rule {
203        Rule(self, span.into())
204    }
205}
206impl Display for RuleKind {
207    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
208        match self {
209            RuleKind::Sequence(rules) => match &rules[..] {
210                [] => write!(f, "<>"),
211                [most @ .., last] => {
212                    write!(f, "<")?;
213                    for rule in most {
214                        write!(f, "{rule} ")?;
215                    }
216                    write!(f, "{last}>")
217                }
218            },
219            RuleKind::Record(fields) => {
220                write!(f, "&{{ ")?;
221                for (Spanned(_, name), rule) in fields {
222                    let name = match name {
223                        Some(name) => name,
224                        None => "!",
225                    };
226                    write!(f, "{name}: {rule}; ")?;
227                }
228                write!(f, "}}")
229            }
230
231            RuleKind::Choice(rules) => match &rules[..] {
232                [] => write!(f, "!"),
233                [most @ .., last] => {
234                    for rule in most {
235                        write!(f, "{rule} | ")?;
236                    }
237                    write!(f, "{last}")
238                }
239            },
240            RuleKind::NamedChoice(rules) => {
241                write!(f, "&[ ")?;
242                for (Spanned(_, name), rule) in rules {
243                    write!(f, "{name}: {rule}; ")?;
244                }
245                write!(f, "]")
246            }
247
248            RuleKind::Group(delimiter, inner) => {
249                write!(f, "{}{}{}", delimiter.opener(), inner, delimiter.closer())
250            }
251            RuleKind::Repeat(RepeatRule {
252                element,
253                separator,
254                at_least,
255                allow_trailing,
256            }) => write!(
257                f,
258                "{} {} {}{}",
259                element,
260                separator,
261                at_least,
262                if *allow_trailing { "~" } else { "" }
263            ),
264            RuleKind::Optional(rule) => write!(f, "{rule}?"),
265            RuleKind::Punctuation(punct) => write!(f, "'{punct}'"),
266            RuleKind::Keyword(kw) => write!(f, "\"{kw}\""),
267            RuleKind::Other(name) => write!(f, "{name}"),
268            RuleKind::Builtin(builtin) => write!(f, "@{builtin}"),
269        }
270    }
271}
272
273/// An element of a PEG grammar stack.
274#[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
275pub enum Peg {
276    /// A control element. Should not appear on the stack by the end of parsing.
277    Control(Control),
278    /// A grammar rule element.
279    Rule(Rule),
280}
281impl Peg {
282    pub fn sequence_start(span: Span) -> Self {
283        Self::Control(Control(ControlKind::SequenceStart, span))
284    }
285    pub fn group_start(delimiter: GroupDelimiter, span: Span) -> Self {
286        Self::Control(Control(ControlKind::GroupStart(delimiter), span))
287    }
288
289    pub fn try_as_rule(self) -> Result<Rule> {
290        match self {
291            Peg::Rule(rule) => Ok(rule),
292            Peg::Control(control) => Err(ErrorKind::StrayControl(control.0).with(control.1)),
293        }
294    }
295    pub fn try_as_mut_rule(&mut self) -> Result<&mut Rule> {
296        match self {
297            Peg::Rule(rule) => Ok(rule),
298            Peg::Control(control) => Err(ErrorKind::StrayControl(control.0).with(control.1)),
299        }
300    }
301    pub fn span(&self) -> Span {
302        let (Peg::Control(Control(_, span)) | Peg::Rule(Rule(_, span))) = self;
303        *span
304    }
305}
306
307/// A PEG grammar stack.
308pub struct PegStack(pub Vec<Peg>);
309impl PegStack {
310    pub fn raw_pop_rule(&mut self, operator_span: Span) -> Result<Rule> {
311        match self.0.pop() {
312            Some(Peg::Rule(rule)) => Ok(rule),
313            Some(Peg::Control(control)) => Err(ErrorKind::StrayControl(control.0).with(control.1)),
314            None => Err(ErrorKind::StackEmpty("expected rule".into()).with(operator_span)),
315        }
316    }
317    pub fn take_rule(&mut self, operator_span: Span) -> Result<Rule> {
318        match self.0.pop() {
319            Some(Peg::Rule(Rule(RuleKind::Choice(mut choices), span))) if !choices.is_empty() => {
320                let rule = choices
321                    .pop()
322                    .expect("internal parser error: choices was in fact empty");
323                self.0
324                    .push(Peg::Rule(Rule(RuleKind::Choice(choices), span)));
325                Ok(rule)
326            }
327            Some(Peg::Rule(other_rule)) => Ok(other_rule),
328            Some(Peg::Control(control)) => Err(ErrorKind::StrayControl(control.0).with(control.1)),
329            None => Err(ErrorKind::StackEmpty("expected rule".into()).with(operator_span)),
330        }
331    }
332    pub fn add_rule(&mut self, rule: Rule) {
333        match self.0.pop() {
334            Some(Peg::Rule(Rule(RuleKind::Choice(mut variants), span))) => {
335                if let Some(old_last_variant) = variants.pop() {
336                    let total_span = old_last_variant.1 + span;
337                    if let RuleKind::Sequence(mut sequence) = old_last_variant.0 {
338                        sequence.push(rule);
339                        variants.push(Rule(RuleKind::Sequence(sequence), total_span));
340                    } else {
341                        variants.push(Rule::sequence(vec![old_last_variant, rule], total_span));
342                    }
343                } else {
344                    variants.push(rule);
345                }
346                self.0.push(Rule::choice(variants, span).peg());
347            }
348            Some(other) => {
349                self.0.push(other);
350                self.0.push(rule.peg());
351            }
352            None => {
353                self.0.push(rule.peg());
354            }
355        }
356    }
357    pub fn add_rule_kind(&mut self, kind: RuleKind, span: Span) {
358        self.add_rule(Rule(kind, span));
359    }
360}
361
362/// Input to a PEG parsing function.
363#[derive(Clone, Copy, Debug, Default, Hash, PartialEq, Eq, PartialOrd, Ord)]
364pub struct ParseInput<I: Iterator<Item = Spanned<char>>> {
365    chars: I,
366    end_span: Span,
367}
368impl<I: Iterator<Item = Spanned<char>>> ParseInput<I> {
369    pub fn new(chars: I, end_span: Span) -> Self {
370        Self { chars, end_span }
371    }
372    pub fn current_span(&self) -> Span
373    where
374        I: Clone,
375    {
376        match self.chars.clone().next() {
377            Some(Spanned(span, _)) => span,
378            None => self.end_span,
379        }
380    }
381    fn eof(&self) -> Error {
382        Error::eof(self.end_span)
383    }
384}
385impl<I: Iterator<Item = Spanned<char>>> Iterator for ParseInput<I> {
386    type Item = Spanned<char>;
387    fn next(&mut self) -> Option<Self::Item> {
388        self.chars.next()
389    }
390}
391
392macro_rules! parse_input {
393    () => {
394        ParseInput<impl Iterator<Item = Spanned<char>> + Clone>
395    };
396}
397
398#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
399enum ErrorKind {
400    ExpectedFound(String, String),
401    StackEmpty(String),
402    StrayControl(ControlKind),
403    EOF,
404    InvalidCloser {
405        expected: AnyDelimiter,
406        got: AnyDelimiter,
407    },
408    ExistingPreamble(String),
409}
410impl ErrorKind {
411    fn with(self, span: impl Into<Span>) -> Error {
412        Error(self, span.into())
413    }
414}
415impl Display for ErrorKind {
416    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
417        match self {
418            ErrorKind::ExpectedFound(expected, found) => {
419                write!(f, "expected {expected}, found {found}")
420            }
421            ErrorKind::StackEmpty(expected) => write!(f, "stack empty; {expected}"),
422            ErrorKind::StrayControl(control) => {
423                write!(f, "expected a rule, got a control ({control:?})")
424            }
425            ErrorKind::EOF => write!(f, "unexpected end of file"),
426            ErrorKind::InvalidCloser { expected, got } => write!(
427                f,
428                "expected {} to match {}, got {}",
429                expected.closer(),
430                expected.opener(),
431                got.closer()
432            ),
433            ErrorKind::ExistingPreamble(preamble) => {
434                write!(f, "preamble #{preamble} already exists")
435            }
436        }
437    }
438}
439impl core::error::Error for ErrorKind {}
440
441/// The error type.
442#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
443pub struct Error(ErrorKind, Span);
444impl Error {
445    pub fn span(&self) -> Span {
446        self.1
447    }
448    fn eof(end_span: Span) -> Self {
449        ErrorKind::EOF.with(end_span)
450    }
451}
452impl Display for Error {
453    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
454        write!(f, "{} (at {})", self.0, self.1)
455    }
456}
457impl core::error::Error for Error {}
458pub type Result<T> = core::result::Result<T, Error>;
459
460/// Skips over as many whitespace characters as possible.
461///
462/// Used between many parsing functions.
463pub fn skip_ws(input: &mut parse_input!()) {
464    while let Some(Spanned(_, ch)) = input.clone().next() {
465        if !ch.is_whitespace() {
466            return;
467        }
468        input.next();
469    }
470}
471
472/// Attempts to parse an identifier. If already at EOF, `Ok(None)` is returned.
473pub fn expect_identifier(input: &mut parse_input!()) -> Result<Option<Spanned<String>>> {
474    let (mut span, ch) = match input.next() {
475        Some(Spanned(span, ch @ pat_ident_start!())) => (span, ch),
476        Some(Spanned(span, other)) => {
477            return Err(ErrorKind::ExpectedFound(
478                "identifier".to_owned(),
479                format!("character `{other}`"),
480            )
481            .with(span));
482        }
483        None => return Ok(None),
484    };
485    let mut output = String::from(ch);
486    consume_identifier_rest(input, &mut span, &mut output);
487    Ok(Some(Spanned(span, output)))
488}
489
490/// Similar to `expect_identifier`, but errors on EOF.
491pub fn identifier_or_eof(input: &mut parse_input!()) -> Result<Spanned<String>> {
492    expect_identifier(input)?.ok_or(input.eof())
493}
494
495/// After parsing the first character of an identifier, this can be used to parse the rest of the characters.
496pub fn consume_identifier_rest(
497    input: &mut parse_input!(),
498    into_span: &mut Span,
499    into: &mut String,
500) {
501    while let Some(Spanned(span, ch @ pat_ident_body!())) = input.clone().next() {
502        input.next();
503        *into_span += span;
504        into.push(ch);
505    }
506}
507
508/// Expects some literal text content to appear in the character stream.
509pub fn expect_exactly(input: &mut parse_input!(), string: &str) -> Result<Span> {
510    let mut collected = String::new();
511    let mut acc_span = input.current_span();
512    for ch in string.chars() {
513        let next = input.next();
514        if let Some(Spanned(span, test_ch)) = next {
515            acc_span += span;
516            collected.push(test_ch);
517            if test_ch != ch {
518                return Err(ErrorKind::ExpectedFound(
519                    format!("`{string}`"),
520                    format!("`{collected}`"),
521                )
522                .with(span));
523            }
524        } else {
525            return Err(input.eof());
526        }
527    }
528    Ok(acc_span)
529}
530
531/// Parses a list of record or named choice fields.
532pub fn parse_fields(
533    input: &mut parse_input!(),
534    terminator: char,
535) -> Result<Spanned<Vec<(Spanned<Option<String>>, Rule)>>> {
536    let mut record_fields = vec![];
537    let mut total_span = input.current_span();
538    loop {
539        skip_ws(input);
540        if let Some(Spanned(span, ch)) = input.clone().next()
541            && ch == terminator
542        {
543            total_span += span;
544            input.next();
545            break;
546        }
547
548        let identifier = if let Some(Spanned(span, '!')) = input.clone().next() {
549            input.next();
550            Spanned(span, None)
551        } else {
552            let identifier = expect_identifier(input)?.ok_or(input.eof())?;
553            Spanned(identifier.0, Some(identifier.1))
554        };
555        total_span += identifier.0;
556
557        skip_ws(input);
558        total_span += expect_exactly(input, ":")?;
559
560        skip_ws(input);
561        let start = input.current_span();
562        let mut rule_stack = PegStack(vec![]);
563        while let RuleStatus::Continue = next_rule(input, &mut rule_stack)? {
564            skip_ws(input);
565        }
566
567        let mut rules = rule_stack
568            .0
569            .into_iter()
570            .map(|rule| rule.try_as_rule())
571            .collect::<Result<Vec<_>>>()?;
572        let rule_span = rules.iter().map(|rule| rule.1).fold(start, |a, b| a + b);
573        total_span += rule_span;
574
575        record_fields.push((
576            identifier,
577            if rules.len() == 1 {
578                rules.pop().unwrap()
579            } else {
580                Rule::sequence(rules, rule_span)
581            },
582        ));
583    }
584    Ok(Spanned(total_span, record_fields))
585}
586
587/// Whether or not rule parsing should continue.
588pub enum RuleStatus {
589    Continue,
590    End,
591}
592
593/// Parses the next rule, modifying the stack accordingly. Returns whether or not rule parsing should continue.
594pub fn next_rule(input: &mut parse_input!(), stack: &mut PegStack) -> Result<RuleStatus> {
595    let Spanned(mut peg_span, ch) = input.next().ok_or(input.eof())?;
596    match ch {
597        '"' => {
598            let mut keyword = String::new();
599            loop {
600                let Spanned(span, ch) = input.next().ok_or(input.eof())?;
601                peg_span += span;
602                if ch == '"' {
603                    break;
604                }
605                keyword.push(ch);
606            }
607            stack.add_rule(Rule::keyword(keyword, peg_span));
608        }
609
610        '\'' => {
611            let mut punct = String::new();
612            loop {
613                let Spanned(span, ch) = input.next().ok_or(input.eof())?;
614                if ch == '\'' {
615                    peg_span += span;
616                    break;
617                }
618                if !matches!(ch, pat_punct!()) {
619                    return Err(ErrorKind::ExpectedFound(
620                        "punctuation".to_owned(),
621                        format!("character `{ch}`"),
622                    )
623                    .with(span));
624                }
625                peg_span += span;
626                punct.push(ch);
627            }
628            stack.add_rule(Rule::punctuation(punct, peg_span));
629        }
630
631        '*' => {
632            let separator = stack.raw_pop_rule(peg_span)?;
633            let element = stack.take_rule(peg_span)?;
634            let other_span = element.1 + separator.1;
635            stack.add_rule(Rule::repeat(
636                element,
637                separator,
638                AtLeast::Zero,
639                peg_span + other_span,
640            ));
641        }
642        '+' => {
643            let separator = stack.raw_pop_rule(peg_span)?;
644            let element = stack.take_rule(peg_span)?;
645            let other_span = element.1 + separator.1;
646            stack.add_rule(Rule::repeat(
647                element,
648                separator,
649                AtLeast::One,
650                peg_span + other_span,
651            ));
652        }
653        '~' => {
654            let Rule(kind, span) = stack.take_rule(peg_span)?;
655            let RuleKind::Repeat(mut repeat) = kind else {
656                return Err(ErrorKind::ExpectedFound(
657                    "repetition rule".to_owned(),
658                    "other rule".to_owned(),
659                )
660                .with(peg_span));
661            };
662            repeat.allow_trailing = true;
663            stack.add_rule_kind(RuleKind::Repeat(repeat), peg_span + span);
664        }
665        '?' => {
666            let element = stack.take_rule(peg_span)?;
667            let element_span = element.1;
668            stack.add_rule(Rule::optional(element, peg_span + element_span));
669        }
670
671        '.' => stack.add_rule(Rule::sequence(vec![], peg_span)),
672        '<' => stack.0.push(Peg::sequence_start(peg_span)),
673        '(' => stack
674            .0
675            .push(Peg::group_start(GroupDelimiter::Parenthesis, peg_span)),
676        '[' => stack
677            .0
678            .push(Peg::group_start(GroupDelimiter::Bracket, peg_span)),
679        '{' => stack
680            .0
681            .push(Peg::group_start(GroupDelimiter::Brace, peg_span)),
682        '>' => {
683            let mut sequence = vec![];
684            let mut total_span = peg_span;
685            loop {
686                let peg = stack.0.pop().ok_or(
687                    ErrorKind::StackEmpty("missing start control".to_owned()).with(peg_span),
688                )?;
689                total_span += peg.span();
690                match peg {
691                    Peg::Rule(rule) => {
692                        total_span += rule.1;
693                        sequence.push(rule);
694                    }
695                    Peg::Control(Control(ControlKind::GroupStart(delimiter), span)) => {
696                        return Err(ErrorKind::ExpectedFound(
697                            "sequence start control (`<`)".into(),
698                            format!("group start control ({})", delimiter.opener()),
699                        )
700                        .with(span));
701                    }
702                    Peg::Control(Control(ControlKind::SequenceStart, span)) => {
703                        sequence.reverse();
704                        stack.add_rule(Rule::sequence(sequence, total_span + span));
705                        break;
706                    }
707                }
708            }
709        }
710        ch @ (')' | ']' | '}') => {
711            let closer = match ch {
712                ')' => GroupDelimiter::Parenthesis,
713                ']' => GroupDelimiter::Bracket,
714                '}' => GroupDelimiter::Brace,
715                _ => unreachable!(),
716            };
717            let mut sequence = vec![];
718            let mut inner_span = Span::default();
719            loop {
720                let peg = stack.0.pop().ok_or(
721                    ErrorKind::StackEmpty("missing start control".to_owned()).with(peg_span),
722                )?;
723                peg_span += peg.span();
724                match peg {
725                    Peg::Rule(rule) => {
726                        inner_span += rule.1;
727                        sequence.push(rule);
728                    }
729                    Peg::Control(Control(ControlKind::GroupStart(opener), span)) => {
730                        peg_span += span + inner_span;
731                        if opener == closer {
732                            sequence.reverse();
733                            stack.add_rule(Rule::group(
734                                opener,
735                                if sequence.len() == 1 {
736                                    sequence.pop().unwrap()
737                                } else {
738                                    Rule::sequence(sequence, inner_span)
739                                },
740                                peg_span,
741                            ));
742                            break;
743                        } else {
744                            return Err(ErrorKind::InvalidCloser {
745                                expected: opener.into(),
746                                got: closer.into(),
747                            }
748                            .with(span));
749                        }
750                    }
751                    Peg::Control(Control(ControlKind::SequenceStart, span)) => {
752                        return Err(ErrorKind::InvalidCloser {
753                            expected: AnyDelimiter::AngleBrackets,
754                            got: closer.into(),
755                        }
756                        .with(span));
757                    }
758                }
759            }
760        }
761
762        '|' => {
763            let after = parse_single_grammar(input)?;
764            let first = stack.raw_pop_rule(peg_span)?;
765            let rule = match first {
766                Rule(RuleKind::Choice(mut choices), span) => {
767                    choices.push(after);
768                    Rule::choice(choices, peg_span + span)
769                }
770                other => {
771                    let mut first_variant_span = other.1;
772                    let mut first_variant_sequence = vec![other];
773                    while let Some(peg) = stack.0.pop() {
774                        match peg {
775                            Peg::Control(control) => {
776                                stack.0.push(Peg::Control(control));
777                                peg_span += control.1;
778                                break;
779                            }
780                            Peg::Rule(rule) => {
781                                first_variant_span += rule.1;
782                                first_variant_sequence.push(rule);
783                            }
784                        }
785                    }
786                    first_variant_sequence.reverse();
787                    let first_variant_rule = if first_variant_sequence.len() == 1 {
788                        first_variant_sequence.pop().unwrap()
789                    } else {
790                        Rule::sequence(first_variant_sequence, first_variant_span)
791                    };
792                    let after_span = after.1;
793                    Rule::choice(
794                        vec![first_variant_rule, after],
795                        first_variant_span + peg_span + after_span,
796                    )
797                }
798            };
799            stack.add_rule(rule);
800        }
801
802        '@' => {
803            let Spanned(span, builtin) = expect_identifier(input)?.ok_or(input.eof())?;
804            stack.add_rule(Rule::builtin(builtin, span));
805        }
806
807        '&' => match input.next().ok_or(input.eof())? {
808            Spanned(span, '{') => {
809                peg_span += span;
810                let Spanned(span, fields) = parse_fields(input, '}')?;
811                peg_span += span;
812                stack.add_rule(Rule::record(fields, peg_span));
813            }
814            Spanned(span, '[') => {
815                peg_span += span;
816                let Spanned(span, fields) = parse_fields(input, ']')?;
817                let fields = fields
818                    .into_iter()
819                    // TODO ts ugly ah
820                    .map(|(name, rule)| {
821                        Ok((
822                            Spanned(
823                                name.0,
824                                name.1.ok_or_else(|| {
825                                    ErrorKind::ExpectedFound(
826                                        "name".to_owned(),
827                                        "`!` (named choices cannot have anonymous fields)"
828                                            .to_owned(),
829                                    )
830                                    .with(name.0)
831                                })?,
832                            ),
833                            rule,
834                        ))
835                    })
836                    .collect::<Result<Vec<_>>>()?;
837                peg_span += span;
838                stack.add_rule(Rule::named_choice(fields, peg_span));
839            }
840            other => {
841                return Err(ErrorKind::ExpectedFound(
842                    "one of `{` or `[`".to_owned(),
843                    format!("`{}`", other.1),
844                )
845                .with(other.0));
846            }
847        },
848
849        ';' => return Ok(RuleStatus::End),
850
851        ch @ pat_ident_start!() => {
852            let mut rule_name = String::from(ch);
853            consume_identifier_rest(input, &mut peg_span, &mut rule_name);
854            stack.add_rule(Rule::other(rule_name, peg_span));
855        }
856
857        other => {
858            return Err(ErrorKind::ExpectedFound(
859                "rule".to_owned(),
860                format!("character `{other}`"),
861            )
862            .with(peg_span));
863        }
864    }
865    Ok(RuleStatus::Continue)
866}
867
868/// Parses a full grammar;
869/// creates a new stack and calls `next_rule` repeatedly until no control grammars are on the stack and the stack is not empty.
870pub fn parse_single_grammar(input: &mut parse_input!()) -> Result<Rule> {
871    let mut stack = PegStack(vec![]);
872    while stack.0.is_empty()
873        || stack
874            .0
875            .iter()
876            .any(|peg: &Peg| matches!(&peg, Peg::Control(_)))
877    {
878        skip_ws(input);
879        if let RuleStatus::End = next_rule(input, &mut stack)? {
880            return Err(input.eof());
881        }
882    }
883    Ok(stack.0.pop().unwrap().try_as_rule()?)
884}
885
886/// A `#keywords` preamble.
887#[derive(Clone, Debug, Default, Hash, PartialEq, Eq, PartialOrd, Ord)]
888pub struct Keywords {
889    pub soft: Vec<Spanned<String>>,
890    pub hard: Vec<Spanned<String>>,
891}
892impl Display for Keywords {
893    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
894        if let [most @ .., last] = &self.soft[..] {
895            write!(f, "#keywords soft: ")?;
896            for Spanned(_, word) in most {
897                write!(f, "{word} ")?;
898            }
899            writeln!(f, "{};", last.1)?;
900        }
901        if let [most @ .., last] = &self.hard[..] {
902            write!(f, "#keywords hard: ")?;
903            for Spanned(_, word) in most {
904                write!(f, "{word} ")?;
905            }
906            writeln!(f, "{};", last.1)?;
907        }
908        Ok(())
909    }
910}
911
912/// The set of preambles.
913#[derive(Clone, Debug, Default, Hash, PartialEq, Eq, PartialOrd, Ord)]
914pub struct Preambles {
915    pub keywords: Keywords,
916}
917impl Display for Preambles {
918    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
919        writeln!(f, "{}", self.keywords)
920    }
921}
922
923/// A TECTA PEG module.
924#[derive(Clone, Debug, Default, Hash, PartialEq, Eq, PartialOrd, Ord)]
925pub struct TectaPegModule {
926    pub preambles: Preambles,
927    pub rules: BTreeMap<String, Vec<Rule>>,
928}
929impl Display for TectaPegModule {
930    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
931        write!(f, "{}", self.preambles)?;
932        for (rule_name, rules) in &self.rules {
933            write!(f, "{rule_name} =")?;
934            for rule in rules {
935                write!(f, " {rule}")?;
936            }
937            writeln!(f, ";")?;
938        }
939        Ok(())
940    }
941}
942
943/// Parses a list of identifiers.
944pub fn parse_basic_identifier_list(input: &mut parse_input!()) -> Result<Vec<Spanned<String>>> {
945    let mut identifiers = vec![];
946    loop {
947        skip_ws(input);
948        if let Some(Spanned(_, ';')) = input.clone().next() {
949            return Ok(identifiers);
950        }
951        identifiers.push(identifier_or_eof(input)?);
952    }
953}
954
955/// Parses a TECTA PEG module; a set of preambles and rule definitions.
956pub fn parse_module_inner(input: &mut parse_input!()) -> Result<TectaPegModule> {
957    let mut module = TectaPegModule::default();
958    loop {
959        skip_ws(input);
960        // TODO: split preamble into function
961        if let Some(Spanned(_, '#')) = input.clone().next() {
962            input.next();
963            let Spanned(span, name) = identifier_or_eof(input)?;
964            match &name[..] {
965                "keywords" => {
966                    skip_ws(input);
967                    let Spanned(span, name) = identifier_or_eof(input)?;
968                    let is_hard = match &name[..] {
969                        "hard" => true,
970                        "soft" => false,
971                        other => {
972                            return Err(ErrorKind::ExpectedFound(
973                                "keyword hardness".into(),
974                                format!("`{other}`"),
975                            )
976                            .with(span));
977                        }
978                    };
979
980                    let colon_span = expect_exactly(input, ":")?;
981                    let specified_keywords = parse_basic_identifier_list(input)?;
982
983                    if specified_keywords.is_empty() {
984                        return Err(ErrorKind::ExpectedFound(
985                            "non-empty keyword list".into(),
986                            "empty list".into(),
987                        )
988                        .with(colon_span));
989                    }
990
991                    let target_keyword_set = if is_hard {
992                        &mut module.preambles.keywords.hard
993                    } else {
994                        &mut module.preambles.keywords.soft
995                    };
996                    if !target_keyword_set.is_empty() {
997                        return Err(ErrorKind::ExistingPreamble(format!(
998                            "keywords {}",
999                            if is_hard { "hard" } else { "soft" }
1000                        ))
1001                        .with(colon_span));
1002                    }
1003
1004                    *target_keyword_set = specified_keywords;
1005                }
1006                other => {
1007                    return Err(
1008                        ErrorKind::ExpectedFound("preamble".into(), format!("`{other}`"))
1009                            .with(span),
1010                    );
1011                }
1012            }
1013            expect_exactly(input, ";")?;
1014        } else if let Some(Spanned(_, rule_name)) = expect_identifier(input)? {
1015            skip_ws(input);
1016            let _eq_span = expect_exactly(input, "=")?;
1017            skip_ws(input);
1018
1019            let mut peg_stack = PegStack(vec![]);
1020            while let RuleStatus::Continue = next_rule(input, &mut peg_stack)? {
1021                skip_ws(input);
1022            }
1023            skip_ws(input);
1024
1025            let sequence = peg_stack
1026                .0
1027                .into_iter()
1028                .map(Peg::try_as_rule)
1029                .collect::<Result<Vec<_>>>()?;
1030            module.rules.insert(rule_name, sequence);
1031        } else {
1032            break;
1033        }
1034    }
1035    Ok(module)
1036}
1037
1038/// Parses a TECTA PEG module from a string. See [`parse_module`].
1039pub fn parse_module(str: &str) -> Result<TectaPegModule> {
1040    let end_span = match str.lines().enumerate().last() {
1041        Some((index, line)) => Span {
1042            start_line: index + 1,
1043            end_line: index + 1,
1044            start_column: line.len(),
1045            end_column: line.len(),
1046        },
1047        None => Span::default(),
1048    };
1049    parse_module_inner(&mut ParseInput {
1050        chars: SpanningChars::new(str.chars()),
1051        end_span,
1052    })
1053}