Skip to main content

tecta_peg/
lib.rs

1use std::{
2    collections::BTreeMap,
3    fmt::{Debug, Display},
4};
5
6use tecta_lex::{Delimiter as GroupDelimiter, Span, pat_ident_body, pat_ident_start, pat_punct};
7
8/// A delimiter of a group in a token tree, or a sequence.
9#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
10pub enum AnyDelimiter {
11    /// `()`
12    Parenthesis,
13    /// `[]`
14    Bracket,
15    /// `{}`
16    Brace,
17    /// `<>`,
18    AngleBrackets,
19}
20impl AnyDelimiter {
21    pub fn to_group(&self) -> Option<GroupDelimiter> {
22        match self {
23            Self::Parenthesis => Some(GroupDelimiter::Parenthesis),
24            Self::Bracket => Some(GroupDelimiter::Bracket),
25            Self::Brace => Some(GroupDelimiter::Brace),
26            Self::AngleBrackets => None,
27        }
28    }
29    pub fn opener(&self) -> char {
30        match self {
31            Self::Parenthesis => '(',
32            Self::Bracket => '[',
33            Self::Brace => '{',
34            Self::AngleBrackets => '<',
35        }
36    }
37    pub fn closer(&self) -> char {
38        match self {
39            Self::Parenthesis => ')',
40            Self::Bracket => ']',
41            Self::Brace => '}',
42            Self::AngleBrackets => '>',
43        }
44    }
45}
46impl From<GroupDelimiter> for AnyDelimiter {
47    fn from(value: GroupDelimiter) -> Self {
48        match value {
49            GroupDelimiter::Parenthesis => Self::Parenthesis,
50            GroupDelimiter::Bracket => Self::Bracket,
51            GroupDelimiter::Brace => Self::Brace,
52        }
53    }
54}
55
56/// A transient control rule; cleared by other control characters.
57#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
58pub struct Control(pub ControlKind, pub Span);
59
60/// A specific variant of control rule.
61#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
62pub enum ControlKind {
63    /// Start of a sequence rule, beginning with `<` and ending with `>`.
64    SequenceStart,
65    /// Start of a group rule, beginning with one of `(`, `[`, or `{`, and ending with, respectively, `)`, `]`, or `}`.
66    GroupStart(GroupDelimiter),
67}
68
69/// At least some number of times.
70#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
71pub enum AtLeast {
72    /// Matches a rule zero or more times. Delimited by `*`.
73    Zero,
74    /// Matches a rule one or more times. Delimited by `+`.
75    One,
76}
77impl Display for AtLeast {
78    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
79        match self {
80            Self::One => write!(f, "+"),
81            Self::Zero => write!(f, "*"),
82        }
83    }
84}
85
86/// Repeats a rule a number of times, the repetition choice being decided by the operator used:
87/// - `*` repeats zero or more times
88/// - `+` repeats one or more times
89///
90/// The first operand is the element and the second is the separator.
91/// For example, `"x" ',' *` matches multiple instances of the keyword `x`, separated by commas.
92/// Trailing is enabled with the `~` modifier.
93#[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
94pub struct RepeatRule {
95    pub element: Box<Rule>,
96    pub separator: Box<Rule>,
97    pub at_least: AtLeast,
98    pub allow_trailing: bool,
99}
100
101/// A grammar rule.
102#[derive(Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
103pub struct Rule(pub RuleKind, pub Span);
104impl Rule {
105    pub fn sequence(rules: Vec<Rule>, span: Span) -> Self {
106        Self(RuleKind::Sequence(rules), span)
107    }
108    pub fn choice(rules: Vec<Rule>, span: Span) -> Self {
109        Self(RuleKind::Choice(rules), span)
110    }
111    pub fn group(delimiter: GroupDelimiter, rule: Rule, span: Span) -> Self {
112        Self(RuleKind::Group(delimiter, Box::new(rule)), span)
113    }
114
115    pub fn repeat(element: Rule, separator: Rule, at_least: AtLeast, span: Span) -> Self {
116        Self(
117            RuleKind::Repeat(RepeatRule {
118                element: Box::new(element),
119                separator: Box::new(separator),
120                at_least,
121                allow_trailing: false,
122            }),
123            span,
124        )
125    }
126    pub fn optional(rule: Rule, span: Span) -> Self {
127        Self(RuleKind::Optional(Box::new(rule)), span)
128    }
129
130    pub fn punctuation(repr: String, span: Span) -> Self {
131        Self(RuleKind::Punctuation(repr), span)
132    }
133    pub fn keyword(repr: String, span: Span) -> Self {
134        Self(RuleKind::Keyword(repr), span)
135    }
136    pub fn other(repr: String, span: Span) -> Self {
137        Self(RuleKind::Other(repr), span)
138    }
139    pub fn builtin(repr: String, span: Span) -> Self {
140        Self(RuleKind::Builtin(repr), span)
141    }
142
143    pub fn peg(self) -> Peg {
144        Peg::Rule(self)
145    }
146}
147impl Debug for Rule {
148    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
149        write!(f, "{:?} @{}", self.0, self.1)
150    }
151}
152impl Display for Rule {
153    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
154        write!(f, "{}", self.0)
155    }
156}
157
158/// A specific grammar rule variant.
159#[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
160pub enum RuleKind {
161    /// Matches a sequence of rules, one after another.
162    Sequence(Vec<Rule>),
163    /// Begins all rules at the same point, using the first one that matches.
164    Choice(Vec<Rule>),
165    /// Matches inside a token group.
166    Group(GroupDelimiter, Box<Rule>),
167
168    /// A [repeating rule][`RepeatRule`], delimited with `*` or `+`.
169    Repeat(RepeatRule),
170
171    /// Makes a rule optional (allowed to fail). Delimited with `?`.
172    Optional(Box<Rule>),
173
174    /// Matches a punctuation token.
175    Punctuation(String),
176    /// Matches a keyword token.
177    Keyword(String),
178
179    /// Matches a different rule.
180    Other(String),
181    /// Matches a built-in rule (denoted with `@`).
182    Builtin(String),
183}
184impl RuleKind {
185    pub fn with(self, span: impl Into<Span>) -> Rule {
186        Rule(self, span.into())
187    }
188}
189impl Display for RuleKind {
190    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
191        match self {
192            RuleKind::Sequence(rules) => match &rules[..] {
193                [] => write!(f, "<>"),
194                [most @ .., last] => {
195                    write!(f, "<")?;
196                    for rule in most {
197                        write!(f, "{rule} ")?;
198                    }
199                    write!(f, "{last}>")
200                }
201            },
202            RuleKind::Choice(rules) => match &rules[..] {
203                [] => write!(f, "!"),
204                [most @ .., last] => {
205                    for rule in most {
206                        write!(f, "{rule} | ")?;
207                    }
208                    write!(f, "{last}")
209                }
210            },
211            RuleKind::Group(delimiter, inner) => {
212                write!(f, "{}{}{}", delimiter.opener(), inner, delimiter.closer())
213            }
214            RuleKind::Repeat(RepeatRule {
215                element,
216                separator,
217                at_least,
218                allow_trailing,
219            }) => write!(
220                f,
221                "{} {} {}{}",
222                element,
223                separator,
224                at_least,
225                if *allow_trailing { "~" } else { "" }
226            ),
227            RuleKind::Optional(rule) => write!(f, "{rule}?"),
228            RuleKind::Punctuation(punct) => write!(f, "'{punct}'"),
229            RuleKind::Keyword(kw) => write!(f, "\"{kw}\""),
230            RuleKind::Other(name) => write!(f, "{name}"),
231            RuleKind::Builtin(builtin) => write!(f, "@{builtin}"),
232        }
233    }
234}
235
236/// An element of a PEG grammar stack.
237#[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
238pub enum Peg {
239    /// A control element. Should not appear on the stack by the end of parsing.
240    Control(Control),
241    /// A grammar rule element.
242    Rule(Rule),
243}
244impl Peg {
245    pub fn sequence_start(span: Span) -> Self {
246        Self::Control(Control(ControlKind::SequenceStart, span))
247    }
248    pub fn group_start(delimiter: GroupDelimiter, span: Span) -> Self {
249        Self::Control(Control(ControlKind::GroupStart(delimiter), span))
250    }
251
252    pub fn try_as_rule(self) -> Result<Rule> {
253        match self {
254            Peg::Rule(rule) => Ok(rule),
255            Peg::Control(control) => Err(ErrorKind::StrayControl(control.0).with(control.1)),
256        }
257    }
258    pub fn try_as_mut_rule(&mut self) -> Result<&mut Rule> {
259        match self {
260            Peg::Rule(rule) => Ok(rule),
261            Peg::Control(control) => Err(ErrorKind::StrayControl(control.0).with(control.1)),
262        }
263    }
264    pub fn span(&self) -> Span {
265        let (Peg::Control(Control(_, span)) | Peg::Rule(Rule(_, span))) = self;
266        *span
267    }
268}
269
270/// A PEG grammar stack.
271pub struct PegStack(pub Vec<Peg>);
272impl PegStack {
273    pub fn raw_pop_rule(&mut self, operator_span: Span) -> Result<Rule> {
274        match self.0.pop() {
275            Some(Peg::Rule(rule)) => Ok(rule),
276            Some(Peg::Control(control)) => Err(ErrorKind::StrayControl(control.0).with(control.1)),
277            None => Err(ErrorKind::StackEmpty("expected rule".into()).with(operator_span)),
278        }
279    }
280    pub fn take_rule(&mut self, operator_span: Span) -> Result<Rule> {
281        match self.0.pop() {
282            Some(Peg::Rule(Rule(RuleKind::Choice(mut choices), span))) if !choices.is_empty() => {
283                let rule = choices
284                    .pop()
285                    .expect("internal parser error: choices was in fact empty");
286                self.0
287                    .push(Peg::Rule(Rule(RuleKind::Choice(choices), span)));
288                Ok(rule)
289            }
290            Some(Peg::Rule(other_rule)) => Ok(other_rule),
291            Some(Peg::Control(control)) => Err(ErrorKind::StrayControl(control.0).with(control.1)),
292            None => Err(ErrorKind::StackEmpty("expected rule".into()).with(operator_span)),
293        }
294    }
295    pub fn add_rule(&mut self, rule: Rule) {
296        match self.0.pop() {
297            Some(Peg::Rule(Rule(RuleKind::Choice(mut variants), span))) => {
298                if let Some(old_last_variant) = variants.pop() {
299                    let total_span = old_last_variant.1 + span;
300                    if let RuleKind::Sequence(mut sequence) = old_last_variant.0 {
301                        sequence.push(rule);
302                        variants.push(Rule(RuleKind::Sequence(sequence), total_span));
303                    } else {
304                        variants.push(Rule::sequence(vec![old_last_variant, rule], total_span));
305                    }
306                } else {
307                    variants.push(rule);
308                }
309                self.0.push(Rule::choice(variants, span).peg());
310            }
311            Some(other) => {
312                self.0.push(other);
313                self.0.push(rule.peg());
314            }
315            None => {
316                self.0.push(rule.peg());
317            }
318        }
319    }
320    pub fn add_rule_kind(&mut self, kind: RuleKind, span: Span) {
321        self.add_rule(Rule(kind, span));
322    }
323}
324
325/// Input to a PEG parsing function.
326#[derive(Clone, Copy, Debug, Default, Hash, PartialEq, Eq, PartialOrd, Ord)]
327pub struct ParseInput<I: Iterator<Item = (Span, char)>> {
328    chars: I,
329    end_span: Span,
330}
331impl<I: Iterator<Item = (Span, char)>> ParseInput<I> {
332    pub fn new(chars: I, end_span: Span) -> Self {
333        Self { chars, end_span }
334    }
335    pub fn current_span(&self) -> Span
336    where
337        I: Clone,
338    {
339        match self.chars.clone().next() {
340            Some((span, _)) => span,
341            None => self.end_span,
342        }
343    }
344    fn eof(&self) -> Error {
345        Error::eof(self.end_span)
346    }
347}
348impl<I: Iterator<Item = (Span, char)>> Iterator for ParseInput<I> {
349    type Item = (Span, char);
350    fn next(&mut self) -> Option<Self::Item> {
351        self.chars.next()
352    }
353}
354
355/// Wraps a character iterator, adding span information to each character.
356#[derive(Clone, Copy, Debug, Default, Hash, PartialEq, Eq, PartialOrd, Ord)]
357pub struct SpannedChars<I: Iterator<Item = char>> {
358    iterator: I,
359    current_line: usize,
360    current_column: usize,
361}
362impl<I: Iterator<Item = char>> SpannedChars<I> {
363    pub fn new(iterator: I) -> Self {
364        SpannedChars {
365            iterator,
366            current_line: 1,
367            current_column: 1,
368        }
369    }
370}
371impl<I: Iterator<Item = char>> Iterator for SpannedChars<I> {
372    type Item = (Span, char);
373    fn next(&mut self) -> Option<Self::Item> {
374        let next = self.iterator.next()?;
375        let span = Span {
376            start_line: self.current_line,
377            end_line: self.current_line,
378            start_column: self.current_column,
379            end_column: self.current_column,
380        };
381        if next == '\n' {
382            self.current_line += 1;
383            self.current_column = 1;
384        } else {
385            self.current_column += 1;
386        }
387        Some((span, next))
388    }
389}
390
391macro_rules! parse_input {
392    () => {
393        ParseInput<impl Iterator<Item = (Span, char)> + Clone>
394    };
395}
396
397#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
398enum ErrorKind {
399    ExpectedFound(String, String),
400    StackEmpty(String),
401    StrayControl(ControlKind),
402    EOF,
403    InvalidCloser {
404        expected: AnyDelimiter,
405        got: AnyDelimiter,
406    },
407    ExistingPreamble(String),
408}
409impl ErrorKind {
410    fn with(self, span: impl Into<Span>) -> Error {
411        Error(self, span.into())
412    }
413}
414impl Display for ErrorKind {
415    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
416        match self {
417            ErrorKind::ExpectedFound(expected, found) => {
418                write!(f, "expected {expected}, found {found}")
419            }
420            ErrorKind::StackEmpty(expected) => write!(f, "stack empty; {expected}"),
421            ErrorKind::StrayControl(control) => {
422                write!(f, "expected a rule, got a control ({control:?})")
423            }
424            ErrorKind::EOF => write!(f, "unexpected end of file"),
425            ErrorKind::InvalidCloser { expected, got } => write!(
426                f,
427                "expected {} to match {}, got {}",
428                expected.closer(),
429                expected.opener(),
430                got.closer()
431            ),
432            ErrorKind::ExistingPreamble(preamble) => {
433                write!(f, "preamble #{preamble} already exists")
434            }
435        }
436    }
437}
438impl core::error::Error for ErrorKind {}
439
440/// The error type.
441#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
442pub struct Error(ErrorKind, Span);
443impl Error {
444    pub fn span(&self) -> Span {
445        self.1
446    }
447    fn eof(end_span: Span) -> Self {
448        ErrorKind::EOF.with(end_span)
449    }
450}
451impl Display for Error {
452    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
453        write!(f, "{} (at {})", self.0, self.1)
454    }
455}
456impl core::error::Error for Error {}
457pub type Result<T> = core::result::Result<T, Error>;
458
459/// Skips over as many whitespace characters as possible.
460///
461/// Used between many parsing functions.
462pub fn skip_ws(input: &mut parse_input!()) {
463    while let Some((_, ch)) = input.clone().next() {
464        if !ch.is_whitespace() {
465            return;
466        }
467        input.next();
468    }
469}
470
471/// Attempts to parse an identifier. If already at EOF, `Ok(None)` is returned.
472pub fn expect_identifier(input: &mut parse_input!()) -> Result<Option<(Span, String)>> {
473    let (mut span, ch) = match input.next() {
474        Some((span, ch @ pat_ident_start!())) => (span, ch),
475        Some((span, other)) => {
476            return Err(ErrorKind::ExpectedFound(
477                "identifier".to_owned(),
478                format!("character `{other}`"),
479            )
480            .with(span));
481        }
482        None => return Ok(None),
483    };
484    let mut output = String::from(ch);
485    consume_identifier_rest(input, &mut span, &mut output);
486    Ok(Some((span, output)))
487}
488
489/// Similar to `expect_identifier`, but errors on EOF.
490pub fn identifier_or_eof(input: &mut parse_input!()) -> Result<(Span, String)> {
491    expect_identifier(input)?.ok_or(input.eof())
492}
493
494/// After parsing the first character of an identifier, this can be used to parse the rest of the characters.
495pub fn consume_identifier_rest(
496    input: &mut parse_input!(),
497    into_span: &mut Span,
498    into: &mut String,
499) {
500    while let Some((span, ch @ pat_ident_body!())) = input.clone().next() {
501        input.next();
502        *into_span += span;
503        into.push(ch);
504    }
505}
506
507/// Expects some literal text content to appear in the character stream.
508pub fn expect_exactly(input: &mut parse_input!(), string: &str) -> Result<Span> {
509    let mut collected = String::new();
510    let mut acc_span = input.current_span();
511    for ch in string.chars() {
512        let next = input.next();
513        if let Some((span, test_ch)) = next {
514            acc_span += span;
515            collected.push(test_ch);
516            if test_ch != ch {
517                return Err(ErrorKind::ExpectedFound(
518                    format!("`{string}`"),
519                    format!("`{collected}`"),
520                )
521                .with(span));
522            }
523        } else {
524            return Err(input.eof());
525        }
526    }
527    Ok(acc_span)
528}
529
530/// Whether or not rule parsing should continue.
531pub enum RuleStatus {
532    Continue,
533    End,
534}
535
536/// Parses the next rule, modifying the stack accordingly. Returns whether or not rule parsing should continue.
537pub fn next_rule(input: &mut parse_input!(), stack: &mut PegStack) -> Result<RuleStatus> {
538    let (mut peg_span, ch) = input.next().ok_or(input.eof())?;
539    match ch {
540        '"' => {
541            let mut keyword = String::new();
542            loop {
543                let (span, ch) = input.next().ok_or(input.eof())?;
544                peg_span += span;
545                if ch == '"' {
546                    break;
547                }
548                keyword.push(ch);
549            }
550            stack.add_rule(Rule::keyword(keyword, peg_span));
551        }
552
553        '\'' => {
554            let mut punct = String::new();
555            loop {
556                let (span, ch) = input.next().ok_or(input.eof())?;
557                if ch == '\'' {
558                    peg_span += span;
559                    break;
560                }
561                if !matches!(ch, pat_punct!()) {
562                    return Err(ErrorKind::ExpectedFound(
563                        "punctuation".to_owned(),
564                        format!("character `{ch}`"),
565                    )
566                    .with(span));
567                }
568                peg_span += span;
569                punct.push(ch);
570            }
571            stack.add_rule(Rule::punctuation(punct, peg_span));
572        }
573
574        '*' => {
575            let separator = stack.raw_pop_rule(peg_span)?;
576            let element = stack.take_rule(peg_span)?;
577            let other_span = element.1 + separator.1;
578            stack.add_rule(Rule::repeat(
579                element,
580                separator,
581                AtLeast::Zero,
582                peg_span + other_span,
583            ));
584        }
585        '+' => {
586            let separator = stack.raw_pop_rule(peg_span)?;
587            let element = stack.take_rule(peg_span)?;
588            let other_span = element.1 + separator.1;
589            stack.add_rule(Rule::repeat(
590                element,
591                separator,
592                AtLeast::One,
593                peg_span + other_span,
594            ));
595        }
596        '~' => {
597            let Rule(kind, span) = stack.take_rule(peg_span)?;
598            let RuleKind::Repeat(mut repeat) = kind else {
599                return Err(ErrorKind::ExpectedFound(
600                    "repetition rule".to_owned(),
601                    "other rule".to_owned(),
602                )
603                .with(peg_span));
604            };
605            repeat.allow_trailing = true;
606            stack.add_rule_kind(RuleKind::Repeat(repeat), peg_span + span);
607        }
608        '?' => {
609            let element = stack.take_rule(peg_span)?;
610            let element_span = element.1;
611            stack.add_rule(Rule::optional(element, peg_span + element_span));
612        }
613
614        '.' => stack.add_rule(Rule::sequence(vec![], peg_span)),
615        '<' => stack.0.push(Peg::sequence_start(peg_span)),
616        '(' => stack
617            .0
618            .push(Peg::group_start(GroupDelimiter::Parenthesis, peg_span)),
619        '[' => stack
620            .0
621            .push(Peg::group_start(GroupDelimiter::Bracket, peg_span)),
622        '{' => stack
623            .0
624            .push(Peg::group_start(GroupDelimiter::Brace, peg_span)),
625        '>' => {
626            let mut sequence = vec![];
627            let mut total_span = peg_span;
628            loop {
629                let peg = stack.0.pop().ok_or(
630                    ErrorKind::StackEmpty("missing start control".to_owned()).with(peg_span),
631                )?;
632                total_span += peg.span();
633                match peg {
634                    Peg::Rule(rule) => {
635                        total_span += rule.1;
636                        sequence.push(rule);
637                    }
638                    Peg::Control(Control(ControlKind::GroupStart(delimiter), span)) => {
639                        return Err(ErrorKind::ExpectedFound(
640                            "sequence start control (`<`)".into(),
641                            format!("group start control ({})", delimiter.opener()),
642                        )
643                        .with(span));
644                    }
645                    Peg::Control(Control(ControlKind::SequenceStart, span)) => {
646                        sequence.reverse();
647                        stack.add_rule(Rule::sequence(sequence, total_span + span));
648                        break;
649                    }
650                }
651            }
652        }
653        ch @ (')' | ']' | '}') => {
654            let closer = match ch {
655                ')' => GroupDelimiter::Parenthesis,
656                ']' => GroupDelimiter::Bracket,
657                '}' => GroupDelimiter::Brace,
658                _ => unreachable!(),
659            };
660            let mut sequence = vec![];
661            let mut inner_span = Span::default();
662            loop {
663                let peg = stack.0.pop().ok_or(
664                    ErrorKind::StackEmpty("missing start control".to_owned()).with(peg_span),
665                )?;
666                peg_span += peg.span();
667                match peg {
668                    Peg::Rule(rule) => {
669                        inner_span += rule.1;
670                        sequence.push(rule);
671                    }
672                    Peg::Control(Control(ControlKind::GroupStart(opener), span)) => {
673                        peg_span += span + inner_span;
674                        if opener == closer {
675                            sequence.reverse();
676                            stack.add_rule(Rule::group(
677                                opener,
678                                if sequence.len() == 1 {
679                                    sequence.pop().unwrap()
680                                } else {
681                                    Rule::sequence(sequence, inner_span)
682                                },
683                                peg_span,
684                            ));
685                            break;
686                        } else {
687                            return Err(ErrorKind::InvalidCloser {
688                                expected: opener.into(),
689                                got: closer.into(),
690                            }
691                            .with(span));
692                        }
693                    }
694                    Peg::Control(Control(ControlKind::SequenceStart, span)) => {
695                        return Err(ErrorKind::InvalidCloser {
696                            expected: AnyDelimiter::AngleBrackets,
697                            got: closer.into(),
698                        }
699                        .with(span));
700                    }
701                }
702            }
703        }
704
705        '|' => {
706            let after = parse_single_grammar(input)?;
707            let first = stack.raw_pop_rule(peg_span)?;
708            let rule = match first {
709                Rule(RuleKind::Choice(mut choices), span) => {
710                    choices.push(after);
711                    Rule::choice(choices, peg_span + span)
712                }
713                other => {
714                    let mut first_variant_span = other.1;
715                    let mut first_variant_sequence = vec![other];
716                    while let Some(peg) = stack.0.pop() {
717                        match peg {
718                            Peg::Control(control) => {
719                                stack.0.push(Peg::Control(control));
720                                peg_span += control.1;
721                                break;
722                            }
723                            Peg::Rule(rule) => {
724                                first_variant_span += rule.1;
725                                first_variant_sequence.push(rule);
726                            }
727                        }
728                    }
729                    first_variant_sequence.reverse();
730                    let first_variant_rule = if first_variant_sequence.len() == 1 {
731                        first_variant_sequence.pop().unwrap()
732                    } else {
733                        Rule::sequence(first_variant_sequence, first_variant_span)
734                    };
735                    let after_span = after.1;
736                    Rule::choice(
737                        vec![first_variant_rule, after],
738                        first_variant_span + peg_span + after_span,
739                    )
740                }
741            };
742            stack.add_rule(rule);
743        }
744
745        '@' => {
746            let (span, builtin) = expect_identifier(input)?.ok_or(ErrorKind::EOF.with(peg_span))?;
747            stack.add_rule(Rule::builtin(builtin, span));
748        }
749        ';' => return Ok(RuleStatus::End),
750
751        ch @ pat_ident_start!() => {
752            let mut rule_name = String::from(ch);
753            consume_identifier_rest(input, &mut peg_span, &mut rule_name);
754            stack.add_rule(Rule::other(rule_name, peg_span));
755        }
756
757        other => {
758            return Err(ErrorKind::ExpectedFound(
759                "rule".to_owned(),
760                format!("character `{other}`"),
761            )
762            .with(peg_span));
763        }
764    }
765    Ok(RuleStatus::Continue)
766}
767
768/// Parses a full grammar;
769/// creates a new stack and calls `next_rule` repeatedly until no control grammars are on the stack and the stack is not empty.
770pub fn parse_single_grammar(input: &mut parse_input!()) -> Result<Rule> {
771    let mut stack = PegStack(vec![]);
772    while stack.0.is_empty()
773        || stack
774            .0
775            .iter()
776            .any(|peg: &Peg| matches!(&peg, Peg::Control(_)))
777    {
778        skip_ws(input);
779        if let RuleStatus::End = next_rule(input, &mut stack)? {
780            return Err(input.eof());
781        }
782    }
783    Ok(stack.0.pop().unwrap().try_as_rule()?)
784}
785
786/// A `#keywords` preamble.
787#[derive(Clone, Debug, Default, Hash, PartialEq, Eq, PartialOrd, Ord)]
788pub struct Keywords {
789    pub soft: Vec<(Span, String)>,
790    pub hard: Vec<(Span, String)>,
791}
792impl Display for Keywords {
793    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
794        if let [most @ .., last] = &self.soft[..] {
795            write!(f, "#keywords soft ")?;
796            for (_, word) in most {
797                write!(f, "{word} ")?;
798            }
799            writeln!(f, "{};", last.1)?;
800        }
801        if let [most @ .., last] = &self.hard[..] {
802            write!(f, "#keywords hard ")?;
803            for (_, word) in most {
804                write!(f, "{word} ")?;
805            }
806            writeln!(f, "{};", last.1)?;
807        }
808        Ok(())
809    }
810}
811
812/// The set of preambles.
813#[derive(Clone, Debug, Default, Hash, PartialEq, Eq, PartialOrd, Ord)]
814pub struct Preambles {
815    pub keywords: Keywords,
816}
817impl Display for Preambles {
818    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
819        writeln!(f, "{}", self.keywords)
820    }
821}
822
823/// A TECTA PEG module.
824#[derive(Clone, Debug, Default, Hash, PartialEq, Eq, PartialOrd, Ord)]
825pub struct TectaPegModule {
826    pub preambles: Preambles,
827    pub rules: BTreeMap<String, Vec<Rule>>,
828}
829impl Display for TectaPegModule {
830    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
831        write!(f, "{}", self.preambles)?;
832        for (rule_name, rules) in &self.rules {
833            write!(f, "{rule_name} =")?;
834            for rule in rules {
835                write!(f, " {rule}")?;
836            }
837            writeln!(f, ";")?;
838        }
839        Ok(())
840    }
841}
842
843/// Parses a list of identifiers.
844pub fn parse_basic_identifier_list(input: &mut parse_input!()) -> Result<Vec<(Span, String)>> {
845    let mut identifiers = vec![];
846    loop {
847        skip_ws(input);
848        if let Some((_, ';')) = input.clone().next() {
849            return Ok(identifiers);
850        }
851        identifiers.push(identifier_or_eof(input)?);
852    }
853}
854
855/// Parses a TECTA PEG module; a set of preambles and rule definitions.
856pub fn parse_module_inner(input: &mut parse_input!()) -> Result<TectaPegModule> {
857    let mut module = TectaPegModule::default();
858    loop {
859        skip_ws(input);
860        // TODO: split preamble into function
861        if let Some((_, '#')) = input.clone().next() {
862            input.next();
863            let (span, name) = identifier_or_eof(input)?;
864            match &name[..] {
865                "keywords" => {
866                    skip_ws(input);
867                    let (span, name) = identifier_or_eof(input)?;
868                    let is_hard = match &name[..] {
869                        "hard" => true,
870                        "soft" => false,
871                        other => {
872                            return Err(ErrorKind::ExpectedFound(
873                                "keyword hardness".into(),
874                                format!("`{other}`"),
875                            )
876                            .with(span));
877                        }
878                    };
879
880                    let colon_span = expect_exactly(input, ":")?;
881                    let specified_keywords = parse_basic_identifier_list(input)?;
882
883                    if specified_keywords.is_empty() {
884                        return Err(ErrorKind::ExpectedFound(
885                            "non-empty keyword list".into(),
886                            "empty list".into(),
887                        )
888                        .with(colon_span));
889                    }
890
891                    let target_keyword_set = if is_hard {
892                        &mut module.preambles.keywords.hard
893                    } else {
894                        &mut module.preambles.keywords.soft
895                    };
896                    if !target_keyword_set.is_empty() {
897                        return Err(ErrorKind::ExistingPreamble(format!(
898                            "keywords {}",
899                            if is_hard { "hard" } else { "soft" }
900                        ))
901                        .with(colon_span));
902                    }
903
904                    *target_keyword_set = specified_keywords;
905                }
906                other => {
907                    return Err(
908                        ErrorKind::ExpectedFound("preamble".into(), format!("`{other}`"))
909                            .with(span),
910                    );
911                }
912            }
913            expect_exactly(input, ";")?;
914        } else if let Some((_, rule_name)) = expect_identifier(input)? {
915            skip_ws(input);
916            let _eq_span = expect_exactly(input, "=")?;
917            skip_ws(input);
918
919            let mut peg_stack = PegStack(vec![]);
920            while let RuleStatus::Continue = next_rule(input, &mut peg_stack)? {
921                skip_ws(input);
922            }
923            skip_ws(input);
924
925            let sequence = peg_stack
926                .0
927                .into_iter()
928                .map(Peg::try_as_rule)
929                .collect::<Result<Vec<_>>>()?;
930            module.rules.insert(rule_name, sequence);
931        } else {
932            break;
933        }
934    }
935    Ok(module)
936}
937
938/// Parses a TECTA PEG module from a string. See [`parse_module`].
939pub fn parse_module(str: &str) -> Result<TectaPegModule> {
940    let end_span = match str.lines().enumerate().last() {
941        Some((index, line)) => Span {
942            start_line: index + 1,
943            end_line: index + 1,
944            start_column: line.len(),
945            end_column: line.len(),
946        },
947        None => Span::default(),
948    };
949    parse_module_inner(&mut ParseInput {
950        chars: SpannedChars::new(str.chars()),
951        end_span,
952    })
953}