Skip to main content

tecta_peg/
lib.rs

1use std::{
2    collections::BTreeMap,
3    fmt::{Debug, Display},
4};
5
6use tecta_lex::{Delimiter as GroupDelimiter, Span, pat_ident_body, pat_ident_start, pat_punct};
7
8/// A delimiter of a group in a token tree, or a sequence.
9#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
10pub enum AnyDelimiter {
11    /// `()`
12    Parenthesis,
13    /// `[]`
14    Bracket,
15    /// `{}`
16    Brace,
17    /// `<>`,
18    AngleBrackets,
19}
20impl AnyDelimiter {
21    pub fn to_group(&self) -> Option<GroupDelimiter> {
22        match self {
23            Self::Parenthesis => Some(GroupDelimiter::Parenthesis),
24            Self::Bracket => Some(GroupDelimiter::Bracket),
25            Self::Brace => Some(GroupDelimiter::Brace),
26            Self::AngleBrackets => None,
27        }
28    }
29    pub fn opener(&self) -> char {
30        match self {
31            Self::Parenthesis => '(',
32            Self::Bracket => '[',
33            Self::Brace => '{',
34            Self::AngleBrackets => '<',
35        }
36    }
37    pub fn closer(&self) -> char {
38        match self {
39            Self::Parenthesis => ')',
40            Self::Bracket => ']',
41            Self::Brace => '}',
42            Self::AngleBrackets => '>',
43        }
44    }
45}
46impl From<GroupDelimiter> for AnyDelimiter {
47    fn from(value: GroupDelimiter) -> Self {
48        match value {
49            GroupDelimiter::Parenthesis => Self::Parenthesis,
50            GroupDelimiter::Bracket => Self::Bracket,
51            GroupDelimiter::Brace => Self::Brace,
52        }
53    }
54}
55
56/// A transient control rule; cleared by other control characters.
57#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
58pub struct Control(pub ControlKind, pub Span);
59
60/// A specific variant of control rule.
61#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
62pub enum ControlKind {
63    /// Start of a sequence rule, beginning with `<` and ending with `>`.
64    SequenceStart,
65    /// Start of a group rule, beginning with one of `(`, `[`, or `{`, and ending with, respectively, `)`, `]`, or `}`.
66    GroupStart(GroupDelimiter),
67}
68
69/// At least some number of times.
70#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
71pub enum AtLeast {
72    /// Matches a rule zero or more times. Delimited by `*`.
73    Zero,
74    /// Matches a rule one or more times. Delimited by `+`.
75    One,
76}
77impl Display for AtLeast {
78    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
79        match self {
80            Self::One => write!(f, "+"),
81            Self::Zero => write!(f, "*"),
82        }
83    }
84}
85
86/// Repeats a rule a number of times, the repetition choice being decided by the operator used:
87/// - `*` repeats zero or more times
88/// - `+` repeats one or more times
89///
90/// The first operand is the element and the second is the separator.
91/// For example, `"x" ',' *` matches multiple instances of the keyword `x`, separated by commas.
92/// Trailing is enabled with the `~` modifier.
93#[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
94pub struct RepeatRule {
95    pub element: Box<Rule>,
96    pub separator: Box<Rule>,
97    pub at_least: AtLeast,
98    pub allow_trailing: bool,
99}
100
101/// A grammar rule.
102#[derive(Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
103pub struct Rule(RuleKind, Span);
104impl Rule {
105    pub fn sequence(rules: Vec<Rule>, span: Span) -> Self {
106        Self(RuleKind::Sequence(rules), span)
107    }
108    pub fn choice(rules: Vec<Rule>, span: Span) -> Self {
109        Self(RuleKind::Choice(rules), span)
110    }
111    pub fn group(delimiter: GroupDelimiter, rule: Rule, span: Span) -> Self {
112        Self(RuleKind::Group(delimiter, Box::new(rule)), span)
113    }
114
115    pub fn repeat(element: Rule, separator: Rule, at_least: AtLeast, span: Span) -> Self {
116        Self(
117            RuleKind::Repeat(RepeatRule {
118                element: Box::new(element),
119                separator: Box::new(separator),
120                at_least,
121                allow_trailing: false,
122            }),
123            span,
124        )
125    }
126    pub fn optional(rule: Rule, span: Span) -> Self {
127        Self(RuleKind::Optional(Box::new(rule)), span)
128    }
129
130    pub fn punctuation(repr: String, span: Span) -> Self {
131        Self(RuleKind::Punctuation(repr), span)
132    }
133    pub fn keyword(repr: String, span: Span) -> Self {
134        Self(RuleKind::Keyword(repr), span)
135    }
136    pub fn other(repr: String, span: Span) -> Self {
137        Self(RuleKind::Other(repr), span)
138    }
139    pub fn builtin(repr: String, span: Span) -> Self {
140        Self(RuleKind::Builtin(repr), span)
141    }
142
143    pub fn peg(self) -> Peg {
144        Peg::Rule(self)
145    }
146}
147impl Debug for Rule {
148    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
149        write!(f, "{:?} @{}", self.0, self.1)
150    }
151}
152impl Display for Rule {
153    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
154        write!(f, "{}", self.0)
155    }
156}
157
158/// A specific grammar rule variant.
159#[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
160pub enum RuleKind {
161    /// Matches a sequence of rules, one after another.
162    Sequence(Vec<Rule>),
163    /// Begins all rules at the same point, using the first one that matches.
164    Choice(Vec<Rule>),
165    /// Matches inside a token group.
166    Group(GroupDelimiter, Box<Rule>),
167
168    /// A [repeating rule][`RepeatRule`], delimited with `*` or `+`.
169    Repeat(RepeatRule),
170
171    /// Makes a rule optional (allowed to fail). Delimited with `?`.
172    Optional(Box<Rule>),
173
174    /// Matches a punctuation token.
175    Punctuation(String),
176    /// Matches a keyword token.
177    Keyword(String),
178
179    /// Matches a different rule.
180    Other(String),
181    /// Matches a built-in rule (denoted with `@`).
182    Builtin(String),
183}
184impl RuleKind {
185    pub fn with(self, span: impl Into<Span>) -> Rule {
186        Rule(self, span.into())
187    }
188}
189impl Display for RuleKind {
190    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
191        match self {
192            RuleKind::Sequence(rules) => match &rules[..] {
193                [] => write!(f, "<>"),
194                [most @ .., last] => {
195                    write!(f, "<")?;
196                    for rule in most {
197                        write!(f, "{rule} ")?;
198                    }
199                    write!(f, "{last}>")
200                }
201            },
202            RuleKind::Choice(rules) => match &rules[..] {
203                [] => write!(f, "!"),
204                [most @ .., last] => {
205                    for rule in most {
206                        write!(f, "{rule} | ")?;
207                    }
208                    write!(f, "{last}")
209                }
210            },
211            RuleKind::Group(delimiter, inner) => {
212                write!(f, "{}{}{}", delimiter.opener(), inner, delimiter.closer())
213            }
214            RuleKind::Repeat(RepeatRule {
215                element,
216                separator,
217                at_least,
218                allow_trailing,
219            }) => write!(
220                f,
221                "{} {} {}{}",
222                element,
223                separator,
224                at_least,
225                if *allow_trailing { "~" } else { "" }
226            ),
227            RuleKind::Optional(rule) => write!(f, "{rule}?"),
228            RuleKind::Punctuation(punct) => write!(f, "'{punct}'"),
229            RuleKind::Keyword(kw) => write!(f, "\"{kw}\""),
230            RuleKind::Other(name) => write!(f, "{name}"),
231            RuleKind::Builtin(builtin) => write!(f, "@{builtin}"),
232        }
233    }
234}
235
236/// An element of a PEG grammar stack.
237#[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
238pub enum Peg {
239    /// A control element. Should not appear on the stack by the end of parsing.
240    Control(Control),
241    /// A grammar rule element.
242    Rule(Rule),
243}
244impl Peg {
245    pub fn sequence_start(span: Span) -> Self {
246        Self::Control(Control(ControlKind::SequenceStart, span))
247    }
248    pub fn group_start(delimiter: GroupDelimiter, span: Span) -> Self {
249        Self::Control(Control(ControlKind::GroupStart(delimiter), span))
250    }
251
252    pub fn try_as_rule(self) -> Result<Rule> {
253        match self {
254            Peg::Rule(rule) => Ok(rule),
255            Peg::Control(control) => Err(ErrorKind::StrayControl(control.0).with(control.1)),
256        }
257    }
258    pub fn try_as_mut_rule(&mut self) -> Result<&mut Rule> {
259        match self {
260            Peg::Rule(rule) => Ok(rule),
261            Peg::Control(control) => Err(ErrorKind::StrayControl(control.0).with(control.1)),
262        }
263    }
264    pub fn span(&self) -> Span {
265        let (Peg::Control(Control(_, span)) | Peg::Rule(Rule(_, span))) = self;
266        *span
267    }
268}
269
270/// A PEG grammar stack.
271pub struct PegStack(pub Vec<Peg>);
272impl PegStack {
273    pub fn raw_pop_rule(&mut self, operator_span: Span) -> Result<Rule> {
274        match self.0.pop() {
275            Some(Peg::Rule(rule)) => Ok(rule),
276            Some(Peg::Control(control)) => Err(ErrorKind::StrayControl(control.0).with(control.1)),
277            None => Err(ErrorKind::StackEmpty("expected rule".into()).with(operator_span)),
278        }
279    }
280    pub fn take_rule(&mut self, operator_span: Span) -> Result<Rule> {
281        match self.0.pop() {
282            Some(Peg::Rule(Rule(RuleKind::Choice(mut choices), span))) if !choices.is_empty() => {
283                let rule = choices
284                    .pop()
285                    .expect("internal parser error: choices was in fact empty");
286                self.0
287                    .push(Peg::Rule(Rule(RuleKind::Choice(choices), span)));
288                Ok(rule)
289            }
290            Some(Peg::Rule(other_rule)) => Ok(other_rule),
291            Some(Peg::Control(control)) => Err(ErrorKind::StrayControl(control.0).with(control.1)),
292            None => Err(ErrorKind::StackEmpty("expected rule".into()).with(operator_span)),
293        }
294    }
295    pub fn add_rule(&mut self, rule: Rule) {
296        match self.0.pop() {
297            Some(Peg::Rule(Rule(RuleKind::Choice(mut variants), span))) => {
298                if let Some(old_last_variant) = variants.pop() {
299                    let total_span = old_last_variant.1 + span;
300                    if let RuleKind::Sequence(mut sequence) = old_last_variant.0 {
301                        sequence.push(rule);
302                        variants.push(Rule(RuleKind::Sequence(sequence), total_span));
303                    } else {
304                        variants.push(Rule::sequence(vec![old_last_variant, rule], total_span));
305                    }
306                } else {
307                    variants.push(rule);
308                }
309                self.0.push(Rule::choice(variants, span).peg());
310            }
311            Some(other) => {
312                self.0.push(other);
313                self.0.push(rule.peg());
314            }
315            None => {
316                self.0.push(rule.peg());
317            }
318        }
319    }
320    pub fn add_rule_kind(&mut self, kind: RuleKind, span: Span) {
321        self.add_rule(Rule(kind, span));
322    }
323}
324
325/// Input to a PEG parsing function.
326#[derive(Clone, Copy, Debug, Default, Hash, PartialEq, Eq, PartialOrd, Ord)]
327pub struct ParseInput<I: Iterator<Item = (Span, char)>> {
328    chars: I,
329    end_span: Span,
330}
331impl<I: Iterator<Item = (Span, char)>> ParseInput<I> {
332    pub fn new(chars: I, end_span: Span) -> Self {
333        Self { chars, end_span }
334    }
335    fn eof(&self) -> Error {
336        Error::eof(self.end_span)
337    }
338}
339impl<I: Iterator<Item = (Span, char)>> Iterator for ParseInput<I> {
340    type Item = (Span, char);
341    fn next(&mut self) -> Option<Self::Item> {
342        self.chars.next()
343    }
344}
345
346/// Wraps a character iterator, adding span information to each character.
347#[derive(Clone, Copy, Debug, Default, Hash, PartialEq, Eq, PartialOrd, Ord)]
348pub struct SpannedChars<I: Iterator<Item = char>> {
349    iterator: I,
350    current_line: usize,
351    current_column: usize,
352}
353impl<I: Iterator<Item = char>> SpannedChars<I> {
354    pub fn new(iterator: I) -> Self {
355        SpannedChars {
356            iterator,
357            current_line: 1,
358            current_column: 1,
359        }
360    }
361}
362impl<I: Iterator<Item = char>> Iterator for SpannedChars<I> {
363    type Item = (Span, char);
364    fn next(&mut self) -> Option<Self::Item> {
365        let next = self.iterator.next()?;
366        let span = Span {
367            start_line: self.current_line,
368            end_line: self.current_line,
369            start_column: self.current_column,
370            end_column: self.current_column,
371        };
372        if next == '\n' {
373            self.current_line += 1;
374            self.current_column = 1;
375        } else {
376            self.current_column += 1;
377        }
378        Some((span, next))
379    }
380}
381
382macro_rules! parse_input {
383    () => {
384        ParseInput<impl Iterator<Item = (Span, char)> + Clone>
385    };
386}
387
388#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
389enum ErrorKind {
390    ExpectedFound(String, String),
391    StackEmpty(String),
392    StrayControl(ControlKind),
393    EOF,
394    InvalidCloser {
395        expected: AnyDelimiter,
396        got: AnyDelimiter,
397    },
398    ExistingPreamble(String),
399}
400impl ErrorKind {
401    fn with(self, span: impl Into<Span>) -> Error {
402        Error(self, span.into())
403    }
404}
405impl Display for ErrorKind {
406    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
407        match self {
408            ErrorKind::ExpectedFound(expected, found) => {
409                write!(f, "expected {expected}, found {found}")
410            }
411            ErrorKind::StackEmpty(expected) => write!(f, "stack empty; {expected}"),
412            ErrorKind::StrayControl(control) => {
413                write!(f, "expected a rule, got a control ({control:?})")
414            }
415            ErrorKind::EOF => write!(f, "unexpected end of file"),
416            ErrorKind::InvalidCloser { expected, got } => write!(
417                f,
418                "expected {} to match {}, got {}",
419                expected.closer(),
420                expected.opener(),
421                got.closer()
422            ),
423            ErrorKind::ExistingPreamble(preamble) => {
424                write!(f, "preamble {preamble} already exists")
425            }
426        }
427    }
428}
429impl core::error::Error for ErrorKind {}
430
431/// The error type.
432#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
433pub struct Error(ErrorKind, Span);
434impl Error {
435    pub fn span(&self) -> Span {
436        self.1
437    }
438    fn eof(end_span: Span) -> Self {
439        ErrorKind::EOF.with(end_span)
440    }
441}
442impl Display for Error {
443    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
444        write!(f, "{} (at {})", self.0, self.1)
445    }
446}
447impl core::error::Error for Error {}
448pub type Result<T> = core::result::Result<T, Error>;
449
450/// Skips over as many whitespace characters as possible.
451///
452/// Used between many parsing functions.
453pub fn skip_ws(input: &mut parse_input!()) {
454    while let Some((_, ch)) = input.clone().next() {
455        if !ch.is_whitespace() {
456            return;
457        }
458        input.next();
459    }
460}
461
462/// Attempts to parse an identifier. If already at EOF, `Ok(None)` is returned.
463pub fn expect_identifier(input: &mut parse_input!()) -> Result<Option<(Span, String)>> {
464    let (mut span, ch) = match input.next() {
465        Some((span, ch @ pat_ident_start!())) => (span, ch),
466        Some((span, other)) => {
467            return Err(ErrorKind::ExpectedFound(
468                "identifier".to_owned(),
469                format!("character `{other}`"),
470            )
471            .with(span));
472        }
473        None => return Ok(None),
474    };
475    let mut output = String::from(ch);
476    consume_identifier_rest(input, &mut span, &mut output);
477    Ok(Some((span, output)))
478}
479
480/// Similar to `expect_identifier`, but errors on EOF.
481pub fn identifier_or_eof(input: &mut parse_input!()) -> Result<(Span, String)> {
482    expect_identifier(input)?.ok_or(input.eof())
483}
484
485/// After parsing the first character of an identifier, this can be used to parse the rest of the characters.
486pub fn consume_identifier_rest(
487    input: &mut parse_input!(),
488    into_span: &mut Span,
489    into: &mut String,
490) {
491    while let Some((span, ch @ pat_ident_body!())) = input.clone().next() {
492        input.next();
493        *into_span += span;
494        into.push(ch);
495    }
496}
497
498/// Expects some literal text content to appear in the character stream.
499pub fn expect_exactly(input: &mut parse_input!(), string: &str) -> Result<Option<Span>> {
500    let mut collected = String::new();
501    let mut acc_span = None;
502    for ch in string.chars() {
503        let next = input.next();
504        if let Some((span, test_ch)) = next {
505            acc_span = match acc_span {
506                Some(acc_span) => Some(acc_span + span),
507                None => Some(span),
508            };
509            collected.push(test_ch);
510            if test_ch != ch {
511                return Err(ErrorKind::ExpectedFound(
512                    format!("`{string}`"),
513                    format!("`{collected}`"),
514                )
515                .with(span));
516            }
517        } else {
518            return Err(input.eof());
519        }
520    }
521    Ok(acc_span)
522}
523
524/// Whether or not rule parsing should continue.
525pub enum RuleStatus {
526    Continue,
527    End,
528}
529
530/// Parses the next rule, modifying the stack accordingly. Returns whether or not rule parsing should continue.
531pub fn next_rule(input: &mut parse_input!(), stack: &mut PegStack) -> Result<RuleStatus> {
532    let (mut peg_span, ch) = input.next().ok_or(input.eof())?;
533    match ch {
534        '"' => {
535            let mut keyword = String::new();
536            loop {
537                let (span, ch) = input.next().ok_or(input.eof())?;
538                peg_span += span;
539                if ch == '"' {
540                    break;
541                }
542                keyword.push(ch);
543            }
544            stack.add_rule(Rule::keyword(keyword, peg_span));
545        }
546
547        '\'' => {
548            let mut punct = String::new();
549            loop {
550                let (span, ch) = input.next().ok_or(input.eof())?;
551                if ch == '\'' {
552                    peg_span += span;
553                    break;
554                }
555                if !matches!(ch, pat_punct!()) {
556                    return Err(ErrorKind::ExpectedFound(
557                        "punctuation".to_owned(),
558                        format!("character `{ch}`"),
559                    )
560                    .with(span));
561                }
562                peg_span += span;
563                punct.push(ch);
564            }
565            stack.add_rule(Rule::punctuation(punct, peg_span));
566        }
567
568        '*' => {
569            let separator = stack.raw_pop_rule(peg_span)?;
570            let element = stack.take_rule(peg_span)?;
571            let other_span = element.1 + separator.1;
572            stack.add_rule(Rule::repeat(
573                element,
574                separator,
575                AtLeast::Zero,
576                peg_span + other_span,
577            ));
578        }
579        '+' => {
580            let separator = stack.raw_pop_rule(peg_span)?;
581            let element = stack.take_rule(peg_span)?;
582            let other_span = element.1 + separator.1;
583            stack.add_rule(Rule::repeat(
584                element,
585                separator,
586                AtLeast::One,
587                peg_span + other_span,
588            ));
589        }
590        '~' => {
591            let Rule(kind, span) = stack.take_rule(peg_span)?;
592            let RuleKind::Repeat(mut repeat) = kind else {
593                return Err(ErrorKind::ExpectedFound(
594                    "repetition rule".to_owned(),
595                    "other rule".to_owned(),
596                )
597                .with(peg_span));
598            };
599            repeat.allow_trailing = true;
600            stack.add_rule_kind(RuleKind::Repeat(repeat), peg_span + span);
601        }
602        '?' => {
603            let element = stack.take_rule(peg_span)?;
604            let element_span = element.1;
605            stack.add_rule(Rule::optional(element, peg_span + element_span));
606        }
607
608        '.' => stack.add_rule(Rule::sequence(vec![], peg_span)),
609        '<' => stack.0.push(Peg::sequence_start(peg_span)),
610        '(' => stack
611            .0
612            .push(Peg::group_start(GroupDelimiter::Parenthesis, peg_span)),
613        '[' => stack
614            .0
615            .push(Peg::group_start(GroupDelimiter::Bracket, peg_span)),
616        '{' => stack
617            .0
618            .push(Peg::group_start(GroupDelimiter::Brace, peg_span)),
619        '>' => {
620            let mut sequence = vec![];
621            let mut total_span = peg_span;
622            loop {
623                let peg = stack.0.pop().ok_or(
624                    ErrorKind::StackEmpty("missing start control".to_owned()).with(peg_span),
625                )?;
626                total_span += peg.span();
627                match peg {
628                    Peg::Rule(rule) => {
629                        total_span += rule.1;
630                        sequence.push(rule);
631                    }
632                    Peg::Control(Control(ControlKind::GroupStart(delimiter), span)) => {
633                        return Err(ErrorKind::ExpectedFound(
634                            "sequence start control (`<`)".into(),
635                            format!("group start control ({})", delimiter.opener()),
636                        )
637                        .with(span));
638                    }
639                    Peg::Control(Control(ControlKind::SequenceStart, span)) => {
640                        sequence.reverse();
641                        stack.add_rule(Rule::sequence(sequence, total_span + span));
642                        break;
643                    }
644                }
645            }
646        }
647        ch @ (')' | ']' | '}') => {
648            let closer = match ch {
649                ')' => GroupDelimiter::Parenthesis,
650                ']' => GroupDelimiter::Bracket,
651                '}' => GroupDelimiter::Brace,
652                _ => unreachable!(),
653            };
654            let mut sequence = vec![];
655            let mut inner_span = Span::default();
656            loop {
657                let peg = stack.0.pop().ok_or(
658                    ErrorKind::StackEmpty("missing start control".to_owned()).with(peg_span),
659                )?;
660                peg_span += peg.span();
661                match peg {
662                    Peg::Rule(rule) => {
663                        inner_span += rule.1;
664                        sequence.push(rule);
665                    }
666                    Peg::Control(Control(ControlKind::GroupStart(opener), span)) => {
667                        peg_span += span + inner_span;
668                        if opener == closer {
669                            sequence.reverse();
670                            stack.add_rule(Rule::group(
671                                opener,
672                                if sequence.len() == 1 {
673                                    sequence.pop().unwrap()
674                                } else {
675                                    Rule::sequence(sequence, inner_span)
676                                },
677                                peg_span,
678                            ));
679                            break;
680                        } else {
681                            return Err(ErrorKind::InvalidCloser {
682                                expected: opener.into(),
683                                got: closer.into(),
684                            }
685                            .with(span));
686                        }
687                    }
688                    Peg::Control(Control(ControlKind::SequenceStart, span)) => {
689                        return Err(ErrorKind::InvalidCloser {
690                            expected: AnyDelimiter::AngleBrackets,
691                            got: closer.into(),
692                        }
693                        .with(span));
694                    }
695                }
696            }
697        }
698
699        '|' => {
700            let after = parse_single_grammar(input)?;
701            let first = stack.raw_pop_rule(peg_span)?;
702            let rule = match first {
703                Rule(RuleKind::Choice(mut choices), span) => {
704                    choices.push(after);
705                    Rule::choice(choices, peg_span + span)
706                }
707                other => {
708                    let mut first_variant_span = other.1;
709                    let mut first_variant_sequence = vec![other];
710                    while let Some(peg) = stack.0.pop() {
711                        match peg {
712                            Peg::Control(control) => {
713                                stack.0.push(Peg::Control(control));
714                                peg_span += control.1;
715                                break;
716                            }
717                            Peg::Rule(rule) => {
718                                first_variant_span += rule.1;
719                                first_variant_sequence.push(rule);
720                            }
721                        }
722                    }
723                    first_variant_sequence.reverse();
724                    let first_variant_rule = if first_variant_sequence.len() == 1 {
725                        first_variant_sequence.pop().unwrap()
726                    } else {
727                        Rule::sequence(first_variant_sequence, first_variant_span)
728                    };
729                    let after_span = after.1;
730                    Rule::choice(
731                        vec![first_variant_rule, after],
732                        first_variant_span + peg_span + after_span,
733                    )
734                }
735            };
736            stack.add_rule(rule);
737        }
738
739        '@' => {
740            let (span, builtin) = expect_identifier(input)?.ok_or(ErrorKind::EOF.with(peg_span))?;
741            stack.add_rule(Rule::builtin(builtin, span));
742        }
743        ';' => return Ok(RuleStatus::End),
744
745        ch @ pat_ident_start!() => {
746            let mut rule_name = String::from(ch);
747            consume_identifier_rest(input, &mut peg_span, &mut rule_name);
748            stack.add_rule(Rule::other(rule_name, peg_span));
749        }
750
751        other => {
752            return Err(ErrorKind::ExpectedFound(
753                "rule".to_owned(),
754                format!("character `{other}`"),
755            )
756            .with(peg_span));
757        }
758    }
759    Ok(RuleStatus::Continue)
760}
761
762/// Parses a full grammar;
763/// creates a new stack and calls `next_rule` repeatedly until no control grammars are on the stack and the stack is not empty.
764pub fn parse_single_grammar(input: &mut parse_input!()) -> Result<Rule> {
765    let mut stack = PegStack(vec![]);
766    while stack.0.is_empty()
767        || stack
768            .0
769            .iter()
770            .any(|peg: &Peg| matches!(&peg, Peg::Control(_)))
771    {
772        skip_ws(input);
773        if let RuleStatus::End = next_rule(input, &mut stack)? {
774            return Err(input.eof());
775        }
776    }
777    Ok(stack.0.pop().unwrap().try_as_rule()?)
778}
779
780/// The hardness of a keyword.
781#[derive(Clone, Copy, Debug, Default, Hash, PartialEq, Eq, PartialOrd, Ord)]
782pub enum KeywordHardness {
783    /// Hard keywords present when an identifier is expected will fail to parse.
784    Hard,
785    /// Soft keywords are not fully reserved identifiers, they instead only have a different meaning when directly expected.
786    /// Soft keywords can be used as identifiers.
787    #[default]
788    Soft,
789}
790impl Display for KeywordHardness {
791    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
792        match self {
793            Self::Hard => write!(f, "hard"),
794            Self::Soft => write!(f, "soft"),
795        }
796    }
797}
798
799/// A keyword list; part of the `#keywords` preamble.
800#[derive(Clone, Debug, Default, Hash, PartialEq, Eq, PartialOrd, Ord)]
801pub enum KeywordList {
802    /// Keywords that appear within a rule are added to the keyword list automatically.
803    #[default]
804    Auto,
805    /// Keywords are declared ahead of time; keywords used outside the list constitute an error.
806    Manual {
807        keywords: Vec<(Span, String)>,
808        /// If true, a keyword in the list that is not used inside a rule constitutes an error.
809        strict: bool,
810    },
811}
812impl Display for KeywordList {
813    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
814        match self {
815            Self::Auto => write!(f, " auto"),
816            Self::Manual { keywords, strict } => {
817                if *strict {
818                    write!(f, " strict")?;
819                }
820                write!(f, ": ")?;
821                match &keywords[..] {
822                    [] => Ok(()),
823                    [most @ .., last] => {
824                        for name in most {
825                            write!(f, "{} ", name.1)?;
826                        }
827                        write!(f, "{}", last.1)
828                    }
829                }
830            }
831        }
832    }
833}
834
835/// A `#keywords` preamble.
836#[derive(Clone, Debug, Default, Hash, PartialEq, Eq, PartialOrd, Ord)]
837pub struct Keywords {
838    pub list: KeywordList,
839    pub hardness: KeywordHardness,
840}
841impl Display for Keywords {
842    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
843        write!(f, "#keywords {}{};", self.hardness, self.list)
844    }
845}
846
847/// The set of preambles.
848#[derive(Clone, Debug, Default, Hash, PartialEq, Eq, PartialOrd, Ord)]
849pub struct Preambles {
850    pub keywords: Keywords,
851}
852impl Display for Preambles {
853    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
854        writeln!(f, "{}", self.keywords)
855    }
856}
857
858/// A TECTA PEG module.
859#[derive(Clone, Debug, Default, Hash, PartialEq, Eq, PartialOrd, Ord)]
860pub struct TectaPegModule {
861    pub preambles: Preambles,
862    pub rules: BTreeMap<String, Vec<Rule>>,
863}
864impl Display for TectaPegModule {
865    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
866        write!(f, "{}", self.preambles)?;
867        for (rule_name, rules) in &self.rules {
868            write!(f, "{rule_name} =")?;
869            for rule in rules {
870                write!(f, " {rule}")?;
871            }
872            writeln!(f, ";")?;
873        }
874        Ok(())
875    }
876}
877
878/// Parses a list of identifiers.
879pub fn parse_basic_identifier_list(input: &mut parse_input!()) -> Result<Vec<(Span, String)>> {
880    let mut identifiers = vec![];
881    loop {
882        skip_ws(input);
883        if let Some((_, ';')) = input.clone().next() {
884            return Ok(identifiers);
885        }
886        identifiers.push(identifier_or_eof(input)?);
887    }
888}
889
890/// Parses a TECTA PEG module; a set of preambles and rule definitions.
891pub fn parse_module_inner(input: &mut parse_input!()) -> Result<TectaPegModule> {
892    let mut module = TectaPegModule::default();
893    let mut keywords_preamble_present = false;
894    loop {
895        skip_ws(input);
896        // TODO: split preamble into function
897        if let Some((_, '#')) = input.clone().next() {
898            input.next();
899            let (span, name) = identifier_or_eof(input)?;
900            match &name[..] {
901                "keywords" => {
902                    if keywords_preamble_present {
903                        return Err(ErrorKind::ExistingPreamble(name).with(span));
904                    }
905                    skip_ws(input);
906                    let (span, name) = identifier_or_eof(input)?;
907                    let hardness = match &name[..] {
908                        "hard" => KeywordHardness::Hard,
909                        "soft" => KeywordHardness::Soft,
910                        other => {
911                            return Err(ErrorKind::ExpectedFound(
912                                "keyword hardness".into(),
913                                format!("`{other}`"),
914                            )
915                            .with(span));
916                        }
917                    };
918                    let list = if let Some((_, ':')) = input.clone().next() {
919                        input.next();
920                        KeywordList::Manual {
921                            keywords: parse_basic_identifier_list(input)?,
922                            strict: false,
923                        }
924                    } else {
925                        skip_ws(input);
926                        let (span, name) = identifier_or_eof(input)?;
927                        match &name[..] {
928                            "auto" => KeywordList::Auto,
929                            "strict" => {
930                                expect_exactly(input, ":")?;
931                                KeywordList::Manual {
932                                    keywords: parse_basic_identifier_list(input)?,
933                                    strict: true,
934                                }
935                            }
936                            other => {
937                                return Err(ErrorKind::ExpectedFound(
938                                    "`auto` or `strict`".into(),
939                                    format!("`{other}`"),
940                                )
941                                .with(span));
942                            }
943                        }
944                    };
945                    expect_exactly(input, ";")?;
946                    module.preambles.keywords = Keywords { list, hardness };
947                    keywords_preamble_present = true;
948                }
949                other => {
950                    return Err(
951                        ErrorKind::ExpectedFound("preamble".into(), format!("`{other}`"))
952                            .with(span),
953                    );
954                }
955            }
956        } else if let Some((_, rule_name)) = expect_identifier(input)? {
957            skip_ws(input);
958            let _eq_span = expect_exactly(input, "=")?;
959            skip_ws(input);
960
961            let mut peg_stack = PegStack(vec![]);
962            while let RuleStatus::Continue = next_rule(input, &mut peg_stack)? {
963                skip_ws(input);
964            }
965            skip_ws(input);
966
967            let sequence = peg_stack
968                .0
969                .into_iter()
970                .map(Peg::try_as_rule)
971                .collect::<Result<Vec<_>>>()?;
972            module.rules.insert(rule_name, sequence);
973        } else {
974            break;
975        }
976    }
977    Ok(module)
978}
979
980/// Parses a TECTA PEG module from a string. See [`parse_module`].
981pub fn parse_module(str: &str) -> Result<TectaPegModule> {
982    let end_span = match str.lines().enumerate().last() {
983        Some((index, line)) => Span {
984            start_line: index + 1,
985            end_line: index + 1,
986            start_column: line.len(),
987            end_column: line.len(),
988        },
989        None => Span::default(),
990    };
991    parse_module_inner(&mut ParseInput {
992        chars: SpannedChars::new(str.chars()),
993        end_span,
994    })
995}