Skip to main content

tecta_peg/
lib.rs

1use std::{
2    collections::BTreeMap,
3    fmt::{Debug, Display},
4};
5
6use tecta_lex::{
7    Delimiter as GroupDelimiter, Span, Spanned, SpanningChars, pat_ident_body, pat_ident_start,
8    pat_punct,
9};
10
11/// A delimiter of a group in a token tree, or a sequence.
12#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
13pub enum AnyDelimiter {
14    /// `()`
15    Parenthesis,
16    /// `[]`
17    Bracket,
18    /// `{}`
19    Brace,
20    /// `<>`,
21    AngleBrackets,
22}
23impl AnyDelimiter {
24    pub fn to_group(&self) -> Option<GroupDelimiter> {
25        match self {
26            Self::Parenthesis => Some(GroupDelimiter::Parenthesis),
27            Self::Bracket => Some(GroupDelimiter::Bracket),
28            Self::Brace => Some(GroupDelimiter::Brace),
29            Self::AngleBrackets => None,
30        }
31    }
32    pub fn opener(&self) -> char {
33        match self {
34            Self::Parenthesis => '(',
35            Self::Bracket => '[',
36            Self::Brace => '{',
37            Self::AngleBrackets => '<',
38        }
39    }
40    pub fn closer(&self) -> char {
41        match self {
42            Self::Parenthesis => ')',
43            Self::Bracket => ']',
44            Self::Brace => '}',
45            Self::AngleBrackets => '>',
46        }
47    }
48}
49impl From<GroupDelimiter> for AnyDelimiter {
50    fn from(value: GroupDelimiter) -> Self {
51        match value {
52            GroupDelimiter::Parenthesis => Self::Parenthesis,
53            GroupDelimiter::Bracket => Self::Bracket,
54            GroupDelimiter::Brace => Self::Brace,
55        }
56    }
57}
58
59/// A transient control rule; cleared by other control characters.
60#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
61pub struct Control(pub ControlKind, pub Span);
62
63/// A specific variant of control rule.
64#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
65pub enum ControlKind {
66    /// Start of a sequence rule, beginning with `<` and ending with `>`.
67    SequenceStart,
68    /// Start of a group rule, beginning with one of `(`, `[`, or `{`, and ending with, respectively, `)`, `]`, or `}`.
69    GroupStart(GroupDelimiter),
70}
71
72/// At least some number of times.
73#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
74pub enum AtLeast {
75    /// Matches a rule zero or more times. Delimited by `*`.
76    Zero,
77    /// Matches a rule one or more times. Delimited by `+`.
78    One,
79}
80impl Display for AtLeast {
81    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
82        match self {
83            Self::One => write!(f, "+"),
84            Self::Zero => write!(f, "*"),
85        }
86    }
87}
88
89/// Repeats a rule a number of times, the repetition choice being decided by the operator used:
90/// - `*` repeats zero or more times
91/// - `+` repeats one or more times
92///
93/// The first operand is the element and the second is the separator.
94/// For example, `"x" ',' *` matches multiple instances of the keyword `x`, separated by commas.
95/// Trailing is enabled with the `~` modifier.
96#[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
97pub struct RepeatRule {
98    pub element: Box<Rule>,
99    pub separator: Box<Rule>,
100    pub at_least: AtLeast,
101    pub allow_trailing: bool,
102}
103
104/// A grammar rule.
105#[derive(Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
106pub struct Rule(pub RuleKind, pub Span);
107impl Rule {
108    pub fn sequence(rules: Vec<Rule>, span: Span) -> Self {
109        Self(RuleKind::Sequence(rules), span)
110    }
111    pub fn record(rules: Vec<(Spanned<Option<String>>, Rule)>, span: Span) -> Self {
112        Self(RuleKind::Record(rules), span)
113    }
114
115    pub fn choice(rules: Vec<Rule>, span: Span) -> Self {
116        Self(RuleKind::Choice(rules), span)
117    }
118    pub fn named_choice(rules: Vec<(Spanned<String>, Rule)>, span: Span) -> Self {
119        Self(RuleKind::NamedChoice(rules), span)
120    }
121
122    pub fn group(delimiter: GroupDelimiter, rule: Rule, span: Span) -> Self {
123        Self(RuleKind::Group(delimiter, Box::new(rule)), span)
124    }
125
126    pub fn repeat(element: Rule, separator: Rule, at_least: AtLeast, span: Span) -> Self {
127        Self(
128            RuleKind::Repeat(RepeatRule {
129                element: Box::new(element),
130                separator: Box::new(separator),
131                at_least,
132                allow_trailing: false,
133            }),
134            span,
135        )
136    }
137
138    pub fn optional(rule: Rule, span: Span) -> Self {
139        Self(RuleKind::Optional(Box::new(rule)), span)
140    }
141    pub fn boxed(rule: Rule, span: Span) -> Self {
142        Self(RuleKind::Boxed(Box::new(rule)), span)
143    }
144
145    pub fn punctuation(repr: String, span: Span) -> Self {
146        Self(RuleKind::Punctuation(repr), span)
147    }
148    pub fn keyword(repr: String, span: Span) -> Self {
149        Self(RuleKind::Keyword(repr), span)
150    }
151    pub fn other(repr: String, span: Span) -> Self {
152        Self(RuleKind::Other(repr), span)
153    }
154    pub fn builtin(repr: String, span: Span) -> Self {
155        Self(RuleKind::Builtin(repr), span)
156    }
157
158    pub fn peg(self) -> Peg {
159        Peg::Rule(self)
160    }
161}
162impl Debug for Rule {
163    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
164        write!(f, "{:?} @{}", self.0, self.1)
165    }
166}
167impl Display for Rule {
168    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
169        write!(f, "{}", self.0)
170    }
171}
172
173/// A specific grammar rule variant.
174#[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
175pub enum RuleKind {
176    /// Matches a sequence of rules, one after another.
177    Sequence(Vec<Rule>),
178    /// Like a sequence, but every rule can have a name.
179    Record(Vec<(Spanned<Option<String>>, Rule)>),
180
181    /// Begins all rules at the same point, using the first one that matches.
182    Choice(Vec<Rule>),
183    /// Like a choice, but every rule can have a name.
184    NamedChoice(Vec<(Spanned<String>, Rule)>),
185
186    /// Matches inside a token group.
187    Group(GroupDelimiter, Box<Rule>),
188
189    /// A [repeating rule][`RepeatRule`], delimited with `*` or `+`.
190    Repeat(RepeatRule),
191
192    /// Makes a rule optional (allowed to fail). Delimited with `?`.
193    Optional(Box<Rule>),
194    /// Boxes a rule (permits recursion). Delimited with `^`.
195    Boxed(Box<Rule>),
196
197    /// Matches a punctuation token.
198    Punctuation(String),
199    /// Matches a keyword token.
200    Keyword(String),
201
202    /// Matches a different rule.
203    Other(String),
204    /// Matches a built-in rule (denoted with `@`).
205    Builtin(String),
206}
207impl RuleKind {
208    pub fn with(self, span: impl Into<Span>) -> Rule {
209        Rule(self, span.into())
210    }
211}
212impl Display for RuleKind {
213    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
214        match self {
215            RuleKind::Sequence(rules) => match &rules[..] {
216                [] => write!(f, "<>"),
217                [most @ .., last] => {
218                    write!(f, "<")?;
219                    for rule in most {
220                        write!(f, "{rule} ")?;
221                    }
222                    write!(f, "{last}>")
223                }
224            },
225            RuleKind::Record(fields) => {
226                write!(f, "&{{ ")?;
227                for (Spanned(_, name), rule) in fields {
228                    let name = match name {
229                        Some(name) => name,
230                        None => "!",
231                    };
232                    write!(f, "{name}: {rule}; ")?;
233                }
234                write!(f, "}}")
235            }
236
237            RuleKind::Choice(rules) => match &rules[..] {
238                [] => write!(f, "!"),
239                [most @ .., last] => {
240                    for rule in most {
241                        write!(f, "{rule} | ")?;
242                    }
243                    write!(f, "{last}")
244                }
245            },
246            RuleKind::NamedChoice(rules) => {
247                write!(f, "&[ ")?;
248                for (Spanned(_, name), rule) in rules {
249                    write!(f, "{name}: {rule}; ")?;
250                }
251                write!(f, "]")
252            }
253
254            RuleKind::Group(delimiter, inner) => {
255                write!(f, "{}{}{}", delimiter.opener(), inner, delimiter.closer())
256            }
257            RuleKind::Repeat(RepeatRule {
258                element,
259                separator,
260                at_least,
261                allow_trailing,
262            }) => write!(
263                f,
264                "{} {} {}{}",
265                element,
266                separator,
267                at_least,
268                if *allow_trailing { "~" } else { "" }
269            ),
270            RuleKind::Optional(rule) => write!(f, "{rule}?"),
271            RuleKind::Boxed(rule) => write!(f, "{rule}^"),
272            RuleKind::Punctuation(punct) => write!(f, "'{punct}'"),
273            RuleKind::Keyword(kw) => write!(f, "\"{kw}\""),
274            RuleKind::Other(name) => write!(f, "{name}"),
275            RuleKind::Builtin(builtin) => write!(f, "@{builtin}"),
276        }
277    }
278}
279
280/// An element of a PEG grammar stack.
281#[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
282pub enum Peg {
283    /// A control element. Should not appear on the stack by the end of parsing.
284    Control(Control),
285    /// A grammar rule element.
286    Rule(Rule),
287}
288impl Peg {
289    pub fn sequence_start(span: Span) -> Self {
290        Self::Control(Control(ControlKind::SequenceStart, span))
291    }
292    pub fn group_start(delimiter: GroupDelimiter, span: Span) -> Self {
293        Self::Control(Control(ControlKind::GroupStart(delimiter), span))
294    }
295
296    pub fn try_as_rule(self) -> Result<Rule> {
297        match self {
298            Peg::Rule(rule) => Ok(rule),
299            Peg::Control(control) => Err(ErrorKind::StrayControl(control.0).with(control.1)),
300        }
301    }
302    pub fn try_as_mut_rule(&mut self) -> Result<&mut Rule> {
303        match self {
304            Peg::Rule(rule) => Ok(rule),
305            Peg::Control(control) => Err(ErrorKind::StrayControl(control.0).with(control.1)),
306        }
307    }
308    pub fn span(&self) -> Span {
309        let (Peg::Control(Control(_, span)) | Peg::Rule(Rule(_, span))) = self;
310        *span
311    }
312}
313
314/// A PEG grammar stack.
315pub struct PegStack(pub Vec<Peg>);
316impl PegStack {
317    pub fn raw_pop_rule(&mut self, operator_span: Span) -> Result<Rule> {
318        match self.0.pop() {
319            Some(Peg::Rule(rule)) => Ok(rule),
320            Some(Peg::Control(control)) => Err(ErrorKind::StrayControl(control.0).with(control.1)),
321            None => Err(ErrorKind::StackEmpty("expected rule".into()).with(operator_span)),
322        }
323    }
324    pub fn take_rule(&mut self, operator_span: Span) -> Result<Rule> {
325        match self.0.pop() {
326            Some(Peg::Rule(Rule(RuleKind::Choice(mut choices), span))) if !choices.is_empty() => {
327                let rule = choices
328                    .pop()
329                    .expect("internal parser error: choices was in fact empty");
330                self.0
331                    .push(Peg::Rule(Rule(RuleKind::Choice(choices), span)));
332                Ok(rule)
333            }
334            Some(Peg::Rule(other_rule)) => Ok(other_rule),
335            Some(Peg::Control(control)) => Err(ErrorKind::StrayControl(control.0).with(control.1)),
336            None => Err(ErrorKind::StackEmpty("expected rule".into()).with(operator_span)),
337        }
338    }
339    pub fn add_rule(&mut self, rule: Rule) {
340        match self.0.pop() {
341            Some(Peg::Rule(Rule(RuleKind::Choice(mut variants), span))) => {
342                if let Some(old_last_variant) = variants.pop() {
343                    let total_span = old_last_variant.1 + span;
344                    if let RuleKind::Sequence(mut sequence) = old_last_variant.0 {
345                        sequence.push(rule);
346                        variants.push(Rule(RuleKind::Sequence(sequence), total_span));
347                    } else {
348                        variants.push(Rule::sequence(vec![old_last_variant, rule], total_span));
349                    }
350                } else {
351                    variants.push(rule);
352                }
353                self.0.push(Rule::choice(variants, span).peg());
354            }
355            Some(other) => {
356                self.0.push(other);
357                self.0.push(rule.peg());
358            }
359            None => {
360                self.0.push(rule.peg());
361            }
362        }
363    }
364    pub fn add_rule_kind(&mut self, kind: RuleKind, span: Span) {
365        self.add_rule(Rule(kind, span));
366    }
367}
368
369/// Input to a PEG parsing function.
370#[derive(Clone, Copy, Debug, Default, Hash, PartialEq, Eq, PartialOrd, Ord)]
371pub struct ParseInput<I: Iterator<Item = Spanned<char>>> {
372    chars: I,
373    end_span: Span,
374}
375impl<I: Iterator<Item = Spanned<char>>> ParseInput<I> {
376    pub fn new(chars: I, end_span: Span) -> Self {
377        Self { chars, end_span }
378    }
379    pub fn current_span(&self) -> Span
380    where
381        I: Clone,
382    {
383        match self.chars.clone().next() {
384            Some(Spanned(span, _)) => span,
385            None => self.end_span,
386        }
387    }
388    fn eof(&self) -> Error {
389        Error::eof(self.end_span)
390    }
391}
392impl<I: Iterator<Item = Spanned<char>>> Iterator for ParseInput<I> {
393    type Item = Spanned<char>;
394    fn next(&mut self) -> Option<Self::Item> {
395        self.chars.next()
396    }
397}
398
399macro_rules! parse_input {
400    () => {
401        ParseInput<impl Iterator<Item = Spanned<char>> + Clone>
402    };
403}
404
405#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
406enum ErrorKind {
407    ExpectedFound(String, String),
408    StackEmpty(String),
409    StrayControl(ControlKind),
410    EOF,
411    InvalidCloser {
412        expected: AnyDelimiter,
413        got: AnyDelimiter,
414    },
415    ExistingPreamble(String),
416}
417impl ErrorKind {
418    fn with(self, span: impl Into<Span>) -> Error {
419        Error(self, span.into())
420    }
421}
422impl Display for ErrorKind {
423    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
424        match self {
425            ErrorKind::ExpectedFound(expected, found) => {
426                write!(f, "expected {expected}, found {found}")
427            }
428            ErrorKind::StackEmpty(expected) => write!(f, "stack empty; {expected}"),
429            ErrorKind::StrayControl(control) => {
430                write!(f, "expected a rule, got a control ({control:?})")
431            }
432            ErrorKind::EOF => write!(f, "unexpected end of file"),
433            ErrorKind::InvalidCloser { expected, got } => write!(
434                f,
435                "expected {} to match {}, got {}",
436                expected.closer(),
437                expected.opener(),
438                got.closer()
439            ),
440            ErrorKind::ExistingPreamble(preamble) => {
441                write!(f, "preamble #{preamble} already exists")
442            }
443        }
444    }
445}
446impl core::error::Error for ErrorKind {}
447
448/// The error type.
449#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
450pub struct Error(ErrorKind, Span);
451impl Error {
452    pub fn span(&self) -> Span {
453        self.1
454    }
455    fn eof(end_span: Span) -> Self {
456        ErrorKind::EOF.with(end_span)
457    }
458}
459impl Display for Error {
460    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
461        write!(f, "{} (at {})", self.0, self.1)
462    }
463}
464impl core::error::Error for Error {}
465pub type Result<T> = core::result::Result<T, Error>;
466
467/// Skips over as many whitespace characters as possible.
468///
469/// Used between many parsing functions.
470pub fn skip_ws(input: &mut parse_input!()) {
471    while let Some(Spanned(_, ch)) = input.clone().next() {
472        if !ch.is_whitespace() {
473            return;
474        }
475        input.next();
476    }
477}
478
479/// Attempts to parse an identifier. If already at EOF, `Ok(None)` is returned.
480pub fn expect_identifier(input: &mut parse_input!()) -> Result<Option<Spanned<String>>> {
481    let (mut span, ch) = match input.next() {
482        Some(Spanned(span, ch @ pat_ident_start!())) => (span, ch),
483        Some(Spanned(span, other)) => {
484            return Err(ErrorKind::ExpectedFound(
485                "identifier".to_owned(),
486                format!("character `{other}`"),
487            )
488            .with(span));
489        }
490        None => return Ok(None),
491    };
492    let mut output = String::from(ch);
493    consume_identifier_rest(input, &mut span, &mut output);
494    Ok(Some(Spanned(span, output)))
495}
496
497/// Similar to `expect_identifier`, but errors on EOF.
498pub fn identifier_or_eof(input: &mut parse_input!()) -> Result<Spanned<String>> {
499    expect_identifier(input)?.ok_or(input.eof())
500}
501
502/// After parsing the first character of an identifier, this can be used to parse the rest of the characters.
503pub fn consume_identifier_rest(
504    input: &mut parse_input!(),
505    into_span: &mut Span,
506    into: &mut String,
507) {
508    while let Some(Spanned(span, ch @ pat_ident_body!())) = input.clone().next() {
509        input.next();
510        *into_span += span;
511        into.push(ch);
512    }
513}
514
515/// Expects some literal text content to appear in the character stream.
516pub fn expect_exactly(input: &mut parse_input!(), string: &str) -> Result<Span> {
517    let mut collected = String::new();
518    let mut acc_span = input.current_span();
519    for ch in string.chars() {
520        let next = input.next();
521        if let Some(Spanned(span, test_ch)) = next {
522            acc_span += span;
523            collected.push(test_ch);
524            if test_ch != ch {
525                return Err(ErrorKind::ExpectedFound(
526                    format!("`{string}`"),
527                    format!("`{collected}`"),
528                )
529                .with(span));
530            }
531        } else {
532            return Err(input.eof());
533        }
534    }
535    Ok(acc_span)
536}
537
538/// Parses a list of record or named choice fields.
539pub fn parse_fields(
540    input: &mut parse_input!(),
541    terminator: char,
542) -> Result<Spanned<Vec<(Spanned<Option<String>>, Rule)>>> {
543    let mut record_fields = vec![];
544    let mut total_span = input.current_span();
545    loop {
546        skip_ws(input);
547        if let Some(Spanned(span, ch)) = input.clone().next()
548            && ch == terminator
549        {
550            total_span += span;
551            input.next();
552            break;
553        }
554
555        let identifier = if let Some(Spanned(span, '!')) = input.clone().next() {
556            input.next();
557            Spanned(span, None)
558        } else {
559            let identifier = expect_identifier(input)?.ok_or(input.eof())?;
560            Spanned(identifier.0, Some(identifier.1))
561        };
562        total_span += identifier.0;
563
564        skip_ws(input);
565        total_span += expect_exactly(input, ":")?;
566
567        skip_ws(input);
568        let start = input.current_span();
569        let mut rule_stack = PegStack(vec![]);
570        while let RuleStatus::Continue = next_rule(input, &mut rule_stack)? {
571            skip_ws(input);
572        }
573
574        let mut rules = rule_stack
575            .0
576            .into_iter()
577            .map(|rule| rule.try_as_rule())
578            .collect::<Result<Vec<_>>>()?;
579        let rule_span = rules.iter().map(|rule| rule.1).fold(start, |a, b| a + b);
580        total_span += rule_span;
581
582        record_fields.push((
583            identifier,
584            if rules.len() == 1 {
585                rules.pop().unwrap()
586            } else {
587                Rule::sequence(rules, rule_span)
588            },
589        ));
590    }
591    Ok(Spanned(total_span, record_fields))
592}
593
594/// Whether or not rule parsing should continue.
595pub enum RuleStatus {
596    Continue,
597    End,
598}
599
600/// Parses the next rule, modifying the stack accordingly. Returns whether or not rule parsing should continue.
601pub fn next_rule(input: &mut parse_input!(), stack: &mut PegStack) -> Result<RuleStatus> {
602    let Spanned(mut peg_span, ch) = input.next().ok_or(input.eof())?;
603    match ch {
604        '"' => {
605            let mut keyword = String::new();
606            loop {
607                let Spanned(span, ch) = input.next().ok_or(input.eof())?;
608                peg_span += span;
609                if ch == '"' {
610                    break;
611                }
612                keyword.push(ch);
613            }
614            stack.add_rule(Rule::keyword(keyword, peg_span));
615        }
616
617        '\'' => {
618            let mut punct = String::new();
619            loop {
620                let Spanned(span, ch) = input.next().ok_or(input.eof())?;
621                if ch == '\'' {
622                    peg_span += span;
623                    break;
624                }
625                if !matches!(ch, pat_punct!()) {
626                    return Err(ErrorKind::ExpectedFound(
627                        "punctuation".to_owned(),
628                        format!("character `{ch}`"),
629                    )
630                    .with(span));
631                }
632                peg_span += span;
633                punct.push(ch);
634            }
635            stack.add_rule(Rule::punctuation(punct, peg_span));
636        }
637
638        '*' => {
639            let separator = stack.raw_pop_rule(peg_span)?;
640            let element = stack.take_rule(peg_span)?;
641            let other_span = element.1 + separator.1;
642            stack.add_rule(Rule::repeat(
643                element,
644                separator,
645                AtLeast::Zero,
646                peg_span + other_span,
647            ));
648        }
649        '+' => {
650            let separator = stack.raw_pop_rule(peg_span)?;
651            let element = stack.take_rule(peg_span)?;
652            let other_span = element.1 + separator.1;
653            stack.add_rule(Rule::repeat(
654                element,
655                separator,
656                AtLeast::One,
657                peg_span + other_span,
658            ));
659        }
660        '~' => {
661            let Rule(kind, span) = stack.take_rule(peg_span)?;
662            let RuleKind::Repeat(mut repeat) = kind else {
663                return Err(ErrorKind::ExpectedFound(
664                    "repetition rule".to_owned(),
665                    "other rule".to_owned(),
666                )
667                .with(peg_span));
668            };
669            repeat.allow_trailing = true;
670            stack.add_rule_kind(RuleKind::Repeat(repeat), peg_span + span);
671        }
672
673        '?' => {
674            let element = stack.take_rule(peg_span)?;
675            let element_span = element.1;
676            stack.add_rule(Rule::optional(element, peg_span + element_span));
677        }
678        '^' => {
679            let element = stack.take_rule(peg_span)?;
680            let element_span = element.1;
681            stack.add_rule(Rule::boxed(element, peg_span + element_span));
682        }
683
684        '.' => stack.add_rule(Rule::sequence(vec![], peg_span)),
685        '<' => stack.0.push(Peg::sequence_start(peg_span)),
686        '(' => stack
687            .0
688            .push(Peg::group_start(GroupDelimiter::Parenthesis, peg_span)),
689        '[' => stack
690            .0
691            .push(Peg::group_start(GroupDelimiter::Bracket, peg_span)),
692        '{' => stack
693            .0
694            .push(Peg::group_start(GroupDelimiter::Brace, peg_span)),
695        '>' => {
696            let mut sequence = vec![];
697            let mut total_span = peg_span;
698            loop {
699                let peg = stack.0.pop().ok_or(
700                    ErrorKind::StackEmpty("missing start control".to_owned()).with(peg_span),
701                )?;
702                total_span += peg.span();
703                match peg {
704                    Peg::Rule(rule) => {
705                        total_span += rule.1;
706                        sequence.push(rule);
707                    }
708                    Peg::Control(Control(ControlKind::GroupStart(delimiter), span)) => {
709                        return Err(ErrorKind::ExpectedFound(
710                            "sequence start control (`<`)".into(),
711                            format!("group start control ({})", delimiter.opener()),
712                        )
713                        .with(span));
714                    }
715                    Peg::Control(Control(ControlKind::SequenceStart, span)) => {
716                        sequence.reverse();
717                        stack.add_rule(Rule::sequence(sequence, total_span + span));
718                        break;
719                    }
720                }
721            }
722        }
723        ch @ (')' | ']' | '}') => {
724            let closer = match ch {
725                ')' => GroupDelimiter::Parenthesis,
726                ']' => GroupDelimiter::Bracket,
727                '}' => GroupDelimiter::Brace,
728                _ => unreachable!(),
729            };
730            let mut sequence = vec![];
731            let mut inner_span = Span::default();
732            loop {
733                let peg = stack.0.pop().ok_or(
734                    ErrorKind::StackEmpty("missing start control".to_owned()).with(peg_span),
735                )?;
736                peg_span += peg.span();
737                match peg {
738                    Peg::Rule(rule) => {
739                        inner_span += rule.1;
740                        sequence.push(rule);
741                    }
742                    Peg::Control(Control(ControlKind::GroupStart(opener), span)) => {
743                        peg_span += span + inner_span;
744                        if opener == closer {
745                            sequence.reverse();
746                            stack.add_rule(Rule::group(
747                                opener,
748                                if sequence.len() == 1 {
749                                    sequence.pop().unwrap()
750                                } else {
751                                    Rule::sequence(sequence, inner_span)
752                                },
753                                peg_span,
754                            ));
755                            break;
756                        } else {
757                            return Err(ErrorKind::InvalidCloser {
758                                expected: opener.into(),
759                                got: closer.into(),
760                            }
761                            .with(span));
762                        }
763                    }
764                    Peg::Control(Control(ControlKind::SequenceStart, span)) => {
765                        return Err(ErrorKind::InvalidCloser {
766                            expected: AnyDelimiter::AngleBrackets,
767                            got: closer.into(),
768                        }
769                        .with(span));
770                    }
771                }
772            }
773        }
774
775        '|' => {
776            let after = parse_single_grammar(input)?;
777            let first = stack.raw_pop_rule(peg_span)?;
778            let rule = match first {
779                Rule(RuleKind::Choice(mut choices), span) => {
780                    choices.push(after);
781                    Rule::choice(choices, peg_span + span)
782                }
783                other => {
784                    let mut first_variant_span = other.1;
785                    let mut first_variant_sequence = vec![other];
786                    while let Some(peg) = stack.0.pop() {
787                        match peg {
788                            Peg::Control(control) => {
789                                stack.0.push(Peg::Control(control));
790                                peg_span += control.1;
791                                break;
792                            }
793                            Peg::Rule(rule) => {
794                                first_variant_span += rule.1;
795                                first_variant_sequence.push(rule);
796                            }
797                        }
798                    }
799                    first_variant_sequence.reverse();
800                    let first_variant_rule = if first_variant_sequence.len() == 1 {
801                        first_variant_sequence.pop().unwrap()
802                    } else {
803                        Rule::sequence(first_variant_sequence, first_variant_span)
804                    };
805                    let after_span = after.1;
806                    Rule::choice(
807                        vec![first_variant_rule, after],
808                        first_variant_span + peg_span + after_span,
809                    )
810                }
811            };
812            stack.add_rule(rule);
813        }
814
815        '@' => {
816            let Spanned(span, builtin) = expect_identifier(input)?.ok_or(input.eof())?;
817            stack.add_rule(Rule::builtin(builtin, span));
818        }
819
820        '&' => match input.next().ok_or(input.eof())? {
821            Spanned(span, '{') => {
822                peg_span += span;
823                let Spanned(span, fields) = parse_fields(input, '}')?;
824                peg_span += span;
825                stack.add_rule(Rule::record(fields, peg_span));
826            }
827            Spanned(span, '[') => {
828                peg_span += span;
829                let Spanned(span, fields) = parse_fields(input, ']')?;
830                let fields = fields
831                    .into_iter()
832                    // TODO ts ugly ah
833                    .map(|(name, rule)| {
834                        Ok((
835                            Spanned(
836                                name.0,
837                                name.1.ok_or_else(|| {
838                                    ErrorKind::ExpectedFound(
839                                        "name".to_owned(),
840                                        "`!` (named choices cannot have anonymous fields)"
841                                            .to_owned(),
842                                    )
843                                    .with(name.0)
844                                })?,
845                            ),
846                            rule,
847                        ))
848                    })
849                    .collect::<Result<Vec<_>>>()?;
850                peg_span += span;
851                stack.add_rule(Rule::named_choice(fields, peg_span));
852            }
853            other => {
854                return Err(ErrorKind::ExpectedFound(
855                    "one of `{` or `[`".to_owned(),
856                    format!("`{}`", other.1),
857                )
858                .with(other.0));
859            }
860        },
861
862        ';' => return Ok(RuleStatus::End),
863
864        ch @ pat_ident_start!() => {
865            let mut rule_name = String::from(ch);
866            consume_identifier_rest(input, &mut peg_span, &mut rule_name);
867            stack.add_rule(Rule::other(rule_name, peg_span));
868        }
869
870        other => {
871            return Err(ErrorKind::ExpectedFound(
872                "rule".to_owned(),
873                format!("character `{other}`"),
874            )
875            .with(peg_span));
876        }
877    }
878    Ok(RuleStatus::Continue)
879}
880
881/// Parses a full grammar;
882/// creates a new stack and calls `next_rule` repeatedly until no control grammars are on the stack and the stack is not empty.
883pub fn parse_single_grammar(input: &mut parse_input!()) -> Result<Rule> {
884    let mut stack = PegStack(vec![]);
885    while stack.0.is_empty()
886        || stack
887            .0
888            .iter()
889            .any(|peg: &Peg| matches!(&peg, Peg::Control(_)))
890    {
891        skip_ws(input);
892        if let RuleStatus::End = next_rule(input, &mut stack)? {
893            return Err(input.eof());
894        }
895    }
896    Ok(stack.0.pop().unwrap().try_as_rule()?)
897}
898
899/// A `#keywords` preamble.
900#[derive(Clone, Debug, Default, Hash, PartialEq, Eq, PartialOrd, Ord)]
901pub struct Keywords {
902    pub soft: Vec<Spanned<String>>,
903    pub hard: Vec<Spanned<String>>,
904}
905impl Display for Keywords {
906    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
907        if let [most @ .., last] = &self.soft[..] {
908            write!(f, "#keywords soft: ")?;
909            for Spanned(_, word) in most {
910                write!(f, "{word} ")?;
911            }
912            writeln!(f, "{};", last.1)?;
913        }
914        if let [most @ .., last] = &self.hard[..] {
915            write!(f, "#keywords hard: ")?;
916            for Spanned(_, word) in most {
917                write!(f, "{word} ")?;
918            }
919            writeln!(f, "{};", last.1)?;
920        }
921        Ok(())
922    }
923}
924
925/// The set of preambles.
926#[derive(Clone, Debug, Default, Hash, PartialEq, Eq, PartialOrd, Ord)]
927pub struct Preambles {
928    pub keywords: Keywords,
929}
930impl Display for Preambles {
931    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
932        writeln!(f, "{}", self.keywords)
933    }
934}
935
936/// A TECTA PEG module.
937#[derive(Clone, Debug, Default, Hash, PartialEq, Eq, PartialOrd, Ord)]
938pub struct TectaPegModule {
939    pub preambles: Preambles,
940    pub rules: BTreeMap<String, Vec<Rule>>,
941}
942impl Display for TectaPegModule {
943    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
944        write!(f, "{}", self.preambles)?;
945        for (rule_name, rules) in &self.rules {
946            write!(f, "{rule_name} =")?;
947            for rule in rules {
948                write!(f, " {rule}")?;
949            }
950            writeln!(f, ";")?;
951        }
952        Ok(())
953    }
954}
955
956/// Parses a list of identifiers.
957pub fn parse_basic_identifier_list(input: &mut parse_input!()) -> Result<Vec<Spanned<String>>> {
958    let mut identifiers = vec![];
959    loop {
960        skip_ws(input);
961        if let Some(Spanned(_, ';')) = input.clone().next() {
962            return Ok(identifiers);
963        }
964        identifiers.push(identifier_or_eof(input)?);
965    }
966}
967
968/// Parses a TECTA PEG module; a set of preambles and rule definitions.
969pub fn parse_module_inner(input: &mut parse_input!()) -> Result<TectaPegModule> {
970    let mut module = TectaPegModule::default();
971    loop {
972        skip_ws(input);
973        // TODO: split preamble into function
974        if let Some(Spanned(_, '#')) = input.clone().next() {
975            input.next();
976            let Spanned(span, name) = identifier_or_eof(input)?;
977            match &name[..] {
978                "keywords" => {
979                    skip_ws(input);
980                    let Spanned(span, name) = identifier_or_eof(input)?;
981                    let is_hard = match &name[..] {
982                        "hard" => true,
983                        "soft" => false,
984                        other => {
985                            return Err(ErrorKind::ExpectedFound(
986                                "keyword hardness".into(),
987                                format!("`{other}`"),
988                            )
989                            .with(span));
990                        }
991                    };
992
993                    let colon_span = expect_exactly(input, ":")?;
994                    let specified_keywords = parse_basic_identifier_list(input)?;
995
996                    if specified_keywords.is_empty() {
997                        return Err(ErrorKind::ExpectedFound(
998                            "non-empty keyword list".into(),
999                            "empty list".into(),
1000                        )
1001                        .with(colon_span));
1002                    }
1003
1004                    let target_keyword_set = if is_hard {
1005                        &mut module.preambles.keywords.hard
1006                    } else {
1007                        &mut module.preambles.keywords.soft
1008                    };
1009                    if !target_keyword_set.is_empty() {
1010                        return Err(ErrorKind::ExistingPreamble(format!(
1011                            "keywords {}",
1012                            if is_hard { "hard" } else { "soft" }
1013                        ))
1014                        .with(colon_span));
1015                    }
1016
1017                    *target_keyword_set = specified_keywords;
1018                }
1019                other => {
1020                    return Err(
1021                        ErrorKind::ExpectedFound("preamble".into(), format!("`{other}`"))
1022                            .with(span),
1023                    );
1024                }
1025            }
1026            expect_exactly(input, ";")?;
1027        } else if let Some(Spanned(_, rule_name)) = expect_identifier(input)? {
1028            skip_ws(input);
1029            let _eq_span = expect_exactly(input, "=")?;
1030            skip_ws(input);
1031
1032            let mut peg_stack = PegStack(vec![]);
1033            while let RuleStatus::Continue = next_rule(input, &mut peg_stack)? {
1034                skip_ws(input);
1035            }
1036            skip_ws(input);
1037
1038            let sequence = peg_stack
1039                .0
1040                .into_iter()
1041                .map(Peg::try_as_rule)
1042                .collect::<Result<Vec<_>>>()?;
1043            module.rules.insert(rule_name, sequence);
1044        } else {
1045            break;
1046        }
1047    }
1048    Ok(module)
1049}
1050
1051/// Parses a TECTA PEG module from a string. See [`parse_module`].
1052pub fn parse_module(str: &str) -> Result<TectaPegModule> {
1053    let end_span = match str.lines().enumerate().last() {
1054        Some((index, line)) => Span {
1055            start_line: index + 1,
1056            end_line: index + 1,
1057            start_column: line.len(),
1058            end_column: line.len(),
1059        },
1060        None => Span::default(),
1061    };
1062    parse_module_inner(&mut ParseInput {
1063        chars: SpanningChars::new(str.chars()),
1064        end_span,
1065    })
1066}