Skip to main content

tecta_peg/
lib.rs

1use std::{
2    collections::BTreeMap,
3    fmt::{Debug, Display},
4};
5
6use tecta_lex::{
7    Delimiter as GroupDelimiter, Span, Spanned, SpanningChars, pat_ident_body, pat_ident_start,
8    pat_punct,
9};
10
11/// A delimiter of a group in a token tree, or a sequence.
12#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
13pub enum AnyDelimiter {
14    /// `()`
15    Parenthesis,
16    /// `[]`
17    Bracket,
18    /// `{}`
19    Brace,
20    /// `<>`,
21    AngleBrackets,
22}
23impl AnyDelimiter {
24    pub fn to_group(&self) -> Option<GroupDelimiter> {
25        match self {
26            Self::Parenthesis => Some(GroupDelimiter::Parenthesis),
27            Self::Bracket => Some(GroupDelimiter::Bracket),
28            Self::Brace => Some(GroupDelimiter::Brace),
29            Self::AngleBrackets => None,
30        }
31    }
32    pub fn opener(&self) -> char {
33        match self {
34            Self::Parenthesis => '(',
35            Self::Bracket => '[',
36            Self::Brace => '{',
37            Self::AngleBrackets => '<',
38        }
39    }
40    pub fn closer(&self) -> char {
41        match self {
42            Self::Parenthesis => ')',
43            Self::Bracket => ']',
44            Self::Brace => '}',
45            Self::AngleBrackets => '>',
46        }
47    }
48}
49impl From<GroupDelimiter> for AnyDelimiter {
50    fn from(value: GroupDelimiter) -> Self {
51        match value {
52            GroupDelimiter::Parenthesis => Self::Parenthesis,
53            GroupDelimiter::Bracket => Self::Bracket,
54            GroupDelimiter::Brace => Self::Brace,
55        }
56    }
57}
58
59/// A transient control rule; cleared by other control characters.
60#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
61pub struct Control(pub ControlKind, pub Span);
62impl Display for Control {
63    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
64        write!(f, "{}", self.0)
65    }
66}
67
68/// A specific variant of control rule.
69#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
70pub enum ControlKind {
71    /// Start of a sequence rule, beginning with `<` and ending with `>`.
72    SequenceStart,
73    /// Start of a group rule, beginning with one of `(`, `[`, or `{`, and ending with, respectively, `)`, `]`, or `}`.
74    GroupStart(GroupDelimiter),
75}
76impl Display for ControlKind {
77    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
78        match self {
79            ControlKind::SequenceStart => write!(f, "<"),
80            ControlKind::GroupStart(delimiter) => write!(f, "{}", delimiter.opener()),
81        }
82    }
83}
84
85/// At least some number of times.
86#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
87pub enum AtLeast {
88    /// Matches a rule zero or more times. Delimited by `*`.
89    Zero,
90    /// Matches a rule one or more times. Delimited by `+`.
91    One,
92}
93impl Display for AtLeast {
94    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
95        match self {
96            Self::One => write!(f, "+"),
97            Self::Zero => write!(f, "*"),
98        }
99    }
100}
101
102/// Repeats a rule a number of times, the repetition choice being decided by the operator used:
103/// - `*` repeats zero or more times
104/// - `+` repeats one or more times
105///
106/// The first operand is the element and the second is the separator.
107/// For example, `"x" ',' *` matches multiple instances of the keyword `x`, separated by commas.
108/// Trailing is enabled with the `~` modifier.
109#[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
110pub struct RepeatRule {
111    pub element: Box<Rule>,
112    pub separator: Box<Rule>,
113    pub at_least: AtLeast,
114    pub allow_trailing: bool,
115}
116
117/// A grammar rule.
118#[derive(Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
119pub struct Rule(pub RuleKind, pub Span);
120impl Rule {
121    pub fn sequence(rules: Vec<Rule>, weak: bool, span: Span) -> Self {
122        Self(RuleKind::Sequence { rules, weak }, span)
123    }
124    pub fn record(rules: Vec<(Spanned<Option<String>>, Rule)>, span: Span) -> Self {
125        Self(RuleKind::Record(rules), span)
126    }
127
128    pub fn choice(rules: Vec<Rule>, span: Span) -> Self {
129        Self(RuleKind::Choice(rules), span)
130    }
131    pub fn named_choice(rules: Vec<(Spanned<String>, Rule)>, span: Span) -> Self {
132        Self(RuleKind::NamedChoice(rules), span)
133    }
134
135    pub fn group(delimiter: GroupDelimiter, rule: Rule, span: Span) -> Self {
136        Self(RuleKind::Group(delimiter, Box::new(rule)), span)
137    }
138
139    pub fn repeat(element: Rule, separator: Rule, at_least: AtLeast, span: Span) -> Self {
140        Self(
141            RuleKind::Repeat(RepeatRule {
142                element: Box::new(element),
143                separator: Box::new(separator),
144                at_least,
145                allow_trailing: false,
146            }),
147            span,
148        )
149    }
150
151    pub fn optional(rule: Rule, span: Span) -> Self {
152        Self(RuleKind::Optional(Box::new(rule)), span)
153    }
154    pub fn boxed(rule: Rule, span: Span) -> Self {
155        Self(RuleKind::Boxed(Box::new(rule)), span)
156    }
157
158    pub fn punctuation(repr: String, span: Span) -> Self {
159        Self(RuleKind::Punctuation(repr), span)
160    }
161    pub fn keyword(repr: String, span: Span) -> Self {
162        Self(RuleKind::Keyword(repr), span)
163    }
164    pub fn other(repr: String, span: Span) -> Self {
165        Self(RuleKind::Other(repr), span)
166    }
167    pub fn builtin(repr: String, span: Span) -> Self {
168        Self(RuleKind::Builtin(repr), span)
169    }
170
171    pub fn peg(self) -> Peg {
172        Peg::Rule(self)
173    }
174}
175impl Debug for Rule {
176    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
177        write!(f, "{:?} @{}", self.0, self.1)
178    }
179}
180impl Display for Rule {
181    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
182        write!(f, "{}", self.0)
183    }
184}
185
186/// A specific grammar rule variant.
187#[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
188pub enum RuleKind {
189    /// Matches a sequence of rules, one after another.
190    Sequence { rules: Vec<Rule>, weak: bool },
191    /// Like a sequence, but every rule can have a name.
192    Record(Vec<(Spanned<Option<String>>, Rule)>),
193
194    /// Begins all rules at the same point, using the first one that matches.
195    Choice(Vec<Rule>),
196    /// Like a choice, but every rule can have a name.
197    NamedChoice(Vec<(Spanned<String>, Rule)>),
198
199    /// Matches inside a token group.
200    Group(GroupDelimiter, Box<Rule>),
201
202    /// A [repeating rule][`RepeatRule`], delimited with `*` or `+`.
203    Repeat(RepeatRule),
204
205    /// Makes a rule optional (allowed to fail). Delimited with `?`.
206    Optional(Box<Rule>),
207    /// Boxes a rule (permits recursion). Delimited with `^`.
208    Boxed(Box<Rule>),
209
210    /// Matches a punctuation token.
211    Punctuation(String),
212    /// Matches a keyword token.
213    Keyword(String),
214
215    /// Matches a different rule.
216    Other(String),
217    /// Matches a built-in rule (denoted with `@`).
218    Builtin(String),
219}
220impl RuleKind {
221    pub fn with(self, span: impl Into<Span>) -> Rule {
222        Rule(self, span.into())
223    }
224}
225impl Display for RuleKind {
226    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
227        match self {
228            RuleKind::Sequence { rules, weak } => match &rules[..] {
229                [] => {
230                    if *weak {
231                        Ok(())
232                    } else {
233                        write!(f, "<>")
234                    }
235                }
236                [most @ .., last] => {
237                    if !*weak {
238                        write!(f, "<")?;
239                    }
240                    for rule in most {
241                        write!(f, "{rule} ")?;
242                    }
243                    write!(f, "{last}")?;
244                    if *weak {
245                        Ok(())
246                    } else {
247                        write!(f, ">")
248                    }
249                }
250            },
251            RuleKind::Record(fields) => {
252                write!(f, "&{{ ")?;
253                for (Spanned(_, name), rule) in fields {
254                    let name = match name {
255                        Some(name) => name,
256                        None => "!",
257                    };
258                    write!(f, "{name}: {rule}; ")?;
259                }
260                write!(f, "}}")
261            }
262
263            RuleKind::Choice(rules) => match &rules[..] {
264                [] => write!(f, "!"),
265                [most @ .., last] => {
266                    write!(f, "<")?;
267                    for rule in most {
268                        write!(f, "{rule} | ")?;
269                    }
270                    write!(f, "{last}>")
271                }
272            },
273            RuleKind::NamedChoice(rules) => {
274                write!(f, "&[ ")?;
275                for (Spanned(_, name), rule) in rules {
276                    write!(f, "{name}: {rule}; ")?;
277                }
278                write!(f, "]")
279            }
280
281            RuleKind::Group(delimiter, inner) => {
282                write!(f, "{}{}{}", delimiter.opener(), inner, delimiter.closer())
283            }
284            RuleKind::Repeat(RepeatRule {
285                element,
286                separator,
287                at_least,
288                allow_trailing,
289            }) => write!(
290                f,
291                "{} {} {}{}",
292                element,
293                separator,
294                at_least,
295                if *allow_trailing { "~" } else { "" }
296            ),
297            RuleKind::Optional(rule) => write!(f, "{rule}?"),
298            RuleKind::Boxed(rule) => write!(f, "{rule}^"),
299            RuleKind::Punctuation(punct) => write!(f, "'{punct}'"),
300            RuleKind::Keyword(kw) => write!(f, "\"{kw}\""),
301            RuleKind::Other(name) => write!(f, "{name}"),
302            RuleKind::Builtin(builtin) => write!(f, "@{builtin}"),
303        }
304    }
305}
306
307/// An element of a PEG grammar stack.
308#[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
309pub enum Peg {
310    /// A control element. Should not appear on the stack by the end of parsing.
311    Control(Control),
312    /// A grammar rule element.
313    Rule(Rule),
314}
315impl Peg {
316    pub fn sequence_start(span: Span) -> Self {
317        Self::Control(Control(ControlKind::SequenceStart, span))
318    }
319    pub fn group_start(delimiter: GroupDelimiter, span: Span) -> Self {
320        Self::Control(Control(ControlKind::GroupStart(delimiter), span))
321    }
322
323    pub fn try_as_rule(self) -> Result<Rule> {
324        match self {
325            Peg::Rule(rule) => Ok(rule),
326            Peg::Control(control) => Err(ErrorKind::StrayControl(control.0).with(control.1)),
327        }
328    }
329    pub fn try_as_mut_rule(&mut self) -> Result<&mut Rule> {
330        match self {
331            Peg::Rule(rule) => Ok(rule),
332            Peg::Control(control) => Err(ErrorKind::StrayControl(control.0).with(control.1)),
333        }
334    }
335    pub fn span(&self) -> Span {
336        let (Peg::Control(Control(_, span)) | Peg::Rule(Rule(_, span))) = self;
337        *span
338    }
339}
340impl Display for Peg {
341    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
342        match self {
343            Peg::Control(control) => write!(f, "{control}"),
344            Peg::Rule(rule) => write!(f, "{rule}"),
345        }
346    }
347}
348
349/// A PEG grammar stack.
350pub struct PegStack(pub Vec<Peg>);
351impl PegStack {
352    pub fn raw_pop_rule(&mut self, operator_span: Span) -> Result<Rule> {
353        match self.0.pop() {
354            Some(Peg::Rule(rule)) => Ok(rule),
355            Some(Peg::Control(control)) => Err(ErrorKind::StrayControl(control.0).with(control.1)),
356            None => Err(ErrorKind::StackEmpty("expected rule".into()).with(operator_span)),
357        }
358    }
359    pub fn take_rule(&mut self, operator_span: Span) -> Result<Rule> {
360        match self.0.pop() {
361            Some(Peg::Rule(Rule(RuleKind::Choice(mut choices), span))) if !choices.is_empty() => {
362                let rule = match choices
363                    .pop()
364                    .expect("internal parser error: choices was in fact empty")
365                {
366                    Rule(
367                        RuleKind::Sequence {
368                            mut rules,
369                            weak: true,
370                        },
371                        span,
372                    ) => {
373                        let rule = rules.pop().ok_or(
374                            ErrorKind::StackEmpty(
375                                "no more rules to take from choice variant".to_owned(),
376                            )
377                            .with(operator_span),
378                        )?;
379                        choices.push(Rule(RuleKind::Sequence { rules, weak: true }, span));
380                        rule
381                    }
382                    other => {
383                        choices.push(Rule(
384                            RuleKind::Sequence {
385                                rules: vec![],
386                                weak: true,
387                            },
388                            span,
389                        ));
390                        other
391                    }
392                };
393
394                self.0
395                    .push(Peg::Rule(Rule(RuleKind::Choice(choices), span)));
396                Ok(rule)
397            }
398            Some(Peg::Rule(other_rule)) => Ok(other_rule),
399            Some(Peg::Control(control)) => Err(ErrorKind::StrayControl(control.0).with(control.1)),
400            None => Err(ErrorKind::StackEmpty("expected rule".into()).with(operator_span)),
401        }
402    }
403    pub fn add_rule(&mut self, rule: Rule) {
404        match self.0.pop() {
405            Some(Peg::Rule(Rule(RuleKind::Choice(mut variants), span))) => {
406                if let Some(old_last_variant) = variants.pop() {
407                    let total_span = old_last_variant.1 + span;
408                    if let RuleKind::Sequence {
409                        mut rules,
410                        weak: true,
411                    } = old_last_variant.0
412                    {
413                        rules.push(rule);
414                        variants.push(Rule(RuleKind::Sequence { rules, weak: true }, total_span));
415                    } else {
416                        variants.push(Rule::sequence(
417                            vec![old_last_variant, rule],
418                            true,
419                            total_span,
420                        ));
421                    }
422                } else {
423                    variants.push(rule);
424                }
425                self.0.push(Rule::choice(variants, span).peg());
426            }
427            Some(other) => {
428                self.0.push(other);
429                self.0.push(rule.peg());
430            }
431            None => {
432                self.0.push(rule.peg());
433            }
434        }
435    }
436    pub fn add_rule_kind(&mut self, kind: RuleKind, span: Span) {
437        self.add_rule(Rule(kind, span));
438    }
439}
440impl Display for PegStack {
441    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
442        let [most @ .., last] = &self.0[..] else {
443            return Ok(());
444        };
445        for peg in most {
446            write!(f, "{peg} ")?;
447        }
448        write!(f, "{last}")
449    }
450}
451
452/// Input to a PEG parsing function.
453#[derive(Clone, Copy, Debug, Default, Hash, PartialEq, Eq, PartialOrd, Ord)]
454pub struct ParseInput<I: Iterator<Item = Spanned<char>>> {
455    chars: I,
456    end_span: Span,
457}
458impl<I: Iterator<Item = Spanned<char>>> ParseInput<I> {
459    pub fn new(chars: I, end_span: Span) -> Self {
460        Self { chars, end_span }
461    }
462    pub fn current_span(&self) -> Span
463    where
464        I: Clone,
465    {
466        match self.chars.clone().next() {
467            Some(Spanned(span, _)) => span,
468            None => self.end_span,
469        }
470    }
471    fn eof(&self) -> Error {
472        Error::eof(self.end_span)
473    }
474}
475impl<I: Iterator<Item = Spanned<char>>> Iterator for ParseInput<I> {
476    type Item = Spanned<char>;
477    fn next(&mut self) -> Option<Self::Item> {
478        self.chars.next()
479    }
480}
481
482macro_rules! parse_input {
483    () => {
484        ParseInput<impl Iterator<Item = Spanned<char>> + Clone>
485    };
486}
487
488#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
489enum ErrorKind {
490    ExpectedFound(String, String),
491    StackEmpty(String),
492    StrayControl(ControlKind),
493    EOF,
494    InvalidCloser {
495        expected: AnyDelimiter,
496        got: AnyDelimiter,
497    },
498    ExistingPreamble(String),
499}
500impl ErrorKind {
501    fn with(self, span: impl Into<Span>) -> Error {
502        Error(self, span.into())
503    }
504}
505impl Display for ErrorKind {
506    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
507        match self {
508            ErrorKind::ExpectedFound(expected, found) => {
509                write!(f, "expected {expected}, found {found}")
510            }
511            ErrorKind::StackEmpty(expected) => write!(f, "stack empty; {expected}"),
512            ErrorKind::StrayControl(control) => {
513                write!(f, "expected a rule, got a control ({control:?})")
514            }
515            ErrorKind::EOF => write!(f, "unexpected end of file"),
516            ErrorKind::InvalidCloser { expected, got } => write!(
517                f,
518                "expected {} to match {}, got {}",
519                expected.closer(),
520                expected.opener(),
521                got.closer()
522            ),
523            ErrorKind::ExistingPreamble(preamble) => {
524                write!(f, "preamble #{preamble} already exists")
525            }
526        }
527    }
528}
529impl core::error::Error for ErrorKind {}
530
531/// The error type.
532#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
533pub struct Error(ErrorKind, Span);
534impl Error {
535    pub fn span(&self) -> Span {
536        self.1
537    }
538    fn eof(end_span: Span) -> Self {
539        ErrorKind::EOF.with(end_span)
540    }
541}
542impl Display for Error {
543    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
544        write!(f, "{} (at {})", self.0, self.1)
545    }
546}
547impl core::error::Error for Error {}
548pub type Result<T> = core::result::Result<T, Error>;
549
550/// Skips over as many whitespace characters as possible.
551///
552/// Used between many parsing functions.
553pub fn skip_ws(input: &mut parse_input!()) {
554    while let Some(Spanned(_, ch)) = input.clone().next() {
555        if !ch.is_whitespace() {
556            return;
557        }
558        input.next();
559    }
560}
561
562/// Attempts to parse an identifier. If already at EOF, `Ok(None)` is returned.
563pub fn expect_identifier(input: &mut parse_input!()) -> Result<Option<Spanned<String>>> {
564    let (mut span, ch) = match input.next() {
565        Some(Spanned(span, ch @ pat_ident_start!())) => (span, ch),
566        Some(Spanned(span, other)) => {
567            return Err(ErrorKind::ExpectedFound(
568                "identifier".to_owned(),
569                format!("character `{other}`"),
570            )
571            .with(span));
572        }
573        None => return Ok(None),
574    };
575    let mut output = String::from(ch);
576    consume_identifier_rest(input, &mut span, &mut output);
577    Ok(Some(Spanned(span, output)))
578}
579
580/// Similar to `expect_identifier`, but errors on EOF.
581pub fn identifier_or_eof(input: &mut parse_input!()) -> Result<Spanned<String>> {
582    expect_identifier(input)?.ok_or(input.eof())
583}
584
585/// After parsing the first character of an identifier, this can be used to parse the rest of the characters.
586pub fn consume_identifier_rest(
587    input: &mut parse_input!(),
588    into_span: &mut Span,
589    into: &mut String,
590) {
591    while let Some(Spanned(span, ch @ pat_ident_body!())) = input.clone().next() {
592        input.next();
593        *into_span += span;
594        into.push(ch);
595    }
596}
597
598/// Expects some literal text content to appear in the character stream.
599pub fn expect_exactly(input: &mut parse_input!(), string: &str) -> Result<Span> {
600    let mut collected = String::new();
601    let mut acc_span = input.current_span();
602    for ch in string.chars() {
603        let next = input.next();
604        if let Some(Spanned(span, test_ch)) = next {
605            acc_span += span;
606            collected.push(test_ch);
607            if test_ch != ch {
608                return Err(ErrorKind::ExpectedFound(
609                    format!("`{string}`"),
610                    format!("`{collected}`"),
611                )
612                .with(span));
613            }
614        } else {
615            return Err(input.eof());
616        }
617    }
618    Ok(acc_span)
619}
620
621/// Parses a list of record or named choice fields.
622pub fn parse_fields(
623    input: &mut parse_input!(),
624    terminator: char,
625) -> Result<Spanned<Vec<(Spanned<Option<String>>, Rule)>>> {
626    let mut record_fields = vec![];
627    let mut total_span = input.current_span();
628    loop {
629        skip_ws(input);
630        if let Some(Spanned(span, ch)) = input.clone().next()
631            && ch == terminator
632        {
633            total_span += span;
634            input.next();
635            break;
636        }
637
638        let identifier = if let Some(Spanned(span, '!')) = input.clone().next() {
639            input.next();
640            Spanned(span, None)
641        } else {
642            let identifier = expect_identifier(input)?.ok_or(input.eof())?;
643            Spanned(identifier.0, Some(identifier.1))
644        };
645        total_span += identifier.0;
646
647        skip_ws(input);
648        total_span += expect_exactly(input, ":")?;
649
650        skip_ws(input);
651        let start = input.current_span();
652        let mut rule_stack = PegStack(vec![]);
653        while let RuleStatus::Continue = next_rule(input, &mut rule_stack)? {
654            skip_ws(input);
655        }
656
657        let mut rules = rule_stack
658            .0
659            .into_iter()
660            .map(|rule| rule.try_as_rule())
661            .collect::<Result<Vec<_>>>()?;
662        let rule_span = rules.iter().map(|rule| rule.1).fold(start, |a, b| a + b);
663        total_span += rule_span;
664
665        record_fields.push((
666            identifier,
667            if rules.len() == 1 {
668                rules.pop().unwrap()
669            } else {
670                Rule::sequence(rules, true, rule_span)
671            },
672        ));
673    }
674    Ok(Spanned(total_span, record_fields))
675}
676
677/// Whether or not rule parsing should continue.
678pub enum RuleStatus {
679    Continue,
680    End,
681}
682
683/// Parses the next rule, modifying the stack accordingly. Returns whether or not rule parsing should continue.
684pub fn next_rule(input: &mut parse_input!(), stack: &mut PegStack) -> Result<RuleStatus> {
685    let Spanned(mut peg_span, ch) = input.next().ok_or(input.eof())?;
686    match ch {
687        '"' => {
688            let mut keyword = String::new();
689            loop {
690                let Spanned(span, ch) = input.next().ok_or(input.eof())?;
691                peg_span += span;
692                if ch == '"' {
693                    break;
694                }
695                keyword.push(ch);
696            }
697            stack.add_rule(Rule::keyword(keyword, peg_span));
698        }
699
700        '\'' => {
701            let mut punct = String::new();
702            loop {
703                let Spanned(span, ch) = input.next().ok_or(input.eof())?;
704                if ch == '\'' {
705                    peg_span += span;
706                    break;
707                }
708                if !matches!(ch, pat_punct!()) {
709                    return Err(ErrorKind::ExpectedFound(
710                        "punctuation".to_owned(),
711                        format!("character `{ch}`"),
712                    )
713                    .with(span));
714                }
715                peg_span += span;
716                punct.push(ch);
717            }
718            stack.add_rule(Rule::punctuation(punct, peg_span));
719        }
720
721        '*' => {
722            let separator = stack.raw_pop_rule(peg_span)?;
723            let element = stack.take_rule(peg_span)?;
724            let other_span = element.1 + separator.1;
725            stack.add_rule(Rule::repeat(
726                element,
727                separator,
728                AtLeast::Zero,
729                peg_span + other_span,
730            ));
731        }
732        '+' => {
733            let separator = stack.raw_pop_rule(peg_span)?;
734            let element = stack.take_rule(peg_span)?;
735            let other_span = element.1 + separator.1;
736            stack.add_rule(Rule::repeat(
737                element,
738                separator,
739                AtLeast::One,
740                peg_span + other_span,
741            ));
742        }
743        '~' => {
744            let Rule(kind, span) = stack.take_rule(peg_span)?;
745            let RuleKind::Repeat(mut repeat) = kind else {
746                return Err(ErrorKind::ExpectedFound(
747                    "repetition rule".to_owned(),
748                    "other rule".to_owned(),
749                )
750                .with(peg_span));
751            };
752            repeat.allow_trailing = true;
753            stack.add_rule_kind(RuleKind::Repeat(repeat), peg_span + span);
754        }
755
756        '?' => {
757            let element = stack.take_rule(peg_span)?;
758            let element_span = element.1;
759            stack.add_rule(Rule::optional(element, peg_span + element_span));
760        }
761        '^' => {
762            let element = stack.take_rule(peg_span)?;
763            let element_span = element.1;
764            stack.add_rule(Rule::boxed(element, peg_span + element_span));
765        }
766
767        '.' => stack.add_rule(Rule::sequence(vec![], false, peg_span)),
768        '<' => stack.0.push(Peg::sequence_start(peg_span)),
769        '(' => stack
770            .0
771            .push(Peg::group_start(GroupDelimiter::Parenthesis, peg_span)),
772        '[' => stack
773            .0
774            .push(Peg::group_start(GroupDelimiter::Bracket, peg_span)),
775        '{' => stack
776            .0
777            .push(Peg::group_start(GroupDelimiter::Brace, peg_span)),
778        '>' => {
779            let mut sequence = vec![];
780            let mut total_span = peg_span;
781            loop {
782                let peg = stack.0.pop().ok_or(
783                    ErrorKind::StackEmpty("missing start control".to_owned()).with(peg_span),
784                )?;
785                total_span += peg.span();
786                match peg {
787                    Peg::Rule(rule) => {
788                        total_span += rule.1;
789                        sequence.push(rule);
790                    }
791                    Peg::Control(Control(ControlKind::GroupStart(delimiter), span)) => {
792                        return Err(ErrorKind::ExpectedFound(
793                            "sequence start control (`<`)".into(),
794                            format!("group start control ({})", delimiter.opener()),
795                        )
796                        .with(span));
797                    }
798                    Peg::Control(Control(ControlKind::SequenceStart, span)) => {
799                        sequence.reverse();
800                        stack.add_rule(Rule::sequence(sequence, false, total_span + span));
801                        break;
802                    }
803                }
804            }
805        }
806        ch @ (')' | ']' | '}') => {
807            let closer = match ch {
808                ')' => GroupDelimiter::Parenthesis,
809                ']' => GroupDelimiter::Bracket,
810                '}' => GroupDelimiter::Brace,
811                _ => unreachable!(),
812            };
813            let mut sequence = vec![];
814            let mut inner_span = Span::default();
815            loop {
816                let peg = stack.0.pop().ok_or(
817                    ErrorKind::StackEmpty("missing start control".to_owned()).with(peg_span),
818                )?;
819                peg_span += peg.span();
820                match peg {
821                    Peg::Rule(rule) => {
822                        inner_span += rule.1;
823                        sequence.push(rule);
824                    }
825                    Peg::Control(Control(ControlKind::GroupStart(opener), span)) => {
826                        peg_span += span + inner_span;
827                        if opener == closer {
828                            sequence.reverse();
829                            stack.add_rule(Rule::group(
830                                opener,
831                                if sequence.len() == 1 {
832                                    sequence.pop().unwrap()
833                                } else {
834                                    Rule::sequence(sequence, true, inner_span)
835                                },
836                                peg_span,
837                            ));
838                            break;
839                        } else {
840                            return Err(ErrorKind::InvalidCloser {
841                                expected: opener.into(),
842                                got: closer.into(),
843                            }
844                            .with(span));
845                        }
846                    }
847                    Peg::Control(Control(ControlKind::SequenceStart, span)) => {
848                        return Err(ErrorKind::InvalidCloser {
849                            expected: AnyDelimiter::AngleBrackets,
850                            got: closer.into(),
851                        }
852                        .with(span));
853                    }
854                }
855            }
856        }
857
858        '|' => {
859            let after = parse_single_grammar(input)?;
860            let first = stack.raw_pop_rule(peg_span)?;
861            let rule = match first {
862                Rule(RuleKind::Choice(mut choices), span) => {
863                    choices.push(after);
864                    Rule::choice(choices, peg_span + span)
865                }
866                other => {
867                    let mut first_variant_span = other.1;
868                    let mut first_variant_sequence = vec![other];
869                    while let Some(peg) = stack.0.pop() {
870                        match peg {
871                            Peg::Control(control) => {
872                                stack.0.push(Peg::Control(control));
873                                peg_span += control.1;
874                                break;
875                            }
876                            Peg::Rule(rule) => {
877                                first_variant_span += rule.1;
878                                first_variant_sequence.push(rule);
879                            }
880                        }
881                    }
882                    first_variant_sequence.reverse();
883                    let first_variant_rule = if first_variant_sequence.len() == 1 {
884                        first_variant_sequence.pop().unwrap()
885                    } else {
886                        Rule::sequence(first_variant_sequence, true, first_variant_span)
887                    };
888                    let after_span = after.1;
889                    Rule::choice(
890                        vec![first_variant_rule, after],
891                        first_variant_span + peg_span + after_span,
892                    )
893                }
894            };
895            stack.add_rule(rule);
896        }
897
898        '@' => {
899            let Spanned(span, builtin) = expect_identifier(input)?.ok_or(input.eof())?;
900            stack.add_rule(Rule::builtin(builtin, span));
901        }
902
903        '&' => match input.next().ok_or(input.eof())? {
904            Spanned(span, '{') => {
905                peg_span += span;
906                let Spanned(span, fields) = parse_fields(input, '}')?;
907                peg_span += span;
908                stack.add_rule(Rule::record(fields, peg_span));
909            }
910            Spanned(span, '[') => {
911                peg_span += span;
912                let Spanned(span, fields) = parse_fields(input, ']')?;
913                let fields = fields
914                    .into_iter()
915                    // TODO ts ugly ah
916                    .map(|(name, rule)| {
917                        Ok((
918                            Spanned(
919                                name.0,
920                                name.1.ok_or_else(|| {
921                                    ErrorKind::ExpectedFound(
922                                        "name".to_owned(),
923                                        "`!` (named choices cannot have anonymous fields)"
924                                            .to_owned(),
925                                    )
926                                    .with(name.0)
927                                })?,
928                            ),
929                            rule,
930                        ))
931                    })
932                    .collect::<Result<Vec<_>>>()?;
933                peg_span += span;
934                stack.add_rule(Rule::named_choice(fields, peg_span));
935            }
936            other => {
937                return Err(ErrorKind::ExpectedFound(
938                    "one of `{` or `[`".to_owned(),
939                    format!("`{}`", other.1),
940                )
941                .with(other.0));
942            }
943        },
944
945        ';' => return Ok(RuleStatus::End),
946
947        ch @ pat_ident_start!() => {
948            let mut rule_name = String::from(ch);
949            consume_identifier_rest(input, &mut peg_span, &mut rule_name);
950            stack.add_rule(Rule::other(rule_name, peg_span));
951        }
952
953        other => {
954            return Err(ErrorKind::ExpectedFound(
955                "rule".to_owned(),
956                format!("character `{other}`"),
957            )
958            .with(peg_span));
959        }
960    }
961    Ok(RuleStatus::Continue)
962}
963
964/// Parses a full grammar;
965/// creates a new stack and calls `next_rule` repeatedly until no control grammars are on the stack and the stack is not empty.
966pub fn parse_single_grammar(input: &mut parse_input!()) -> Result<Rule> {
967    let mut stack = PegStack(vec![]);
968    while stack.0.is_empty()
969        || stack
970            .0
971            .iter()
972            .any(|peg: &Peg| matches!(&peg, Peg::Control(_)))
973    {
974        skip_ws(input);
975        if let RuleStatus::End = next_rule(input, &mut stack)? {
976            return Err(input.eof());
977        }
978    }
979    Ok(stack.0.pop().unwrap().try_as_rule()?)
980}
981
982/// A `#keywords` preamble.
983#[derive(Clone, Debug, Default, Hash, PartialEq, Eq, PartialOrd, Ord)]
984pub struct Keywords {
985    pub soft: Vec<Spanned<String>>,
986    pub hard: Vec<Spanned<String>>,
987}
988impl Display for Keywords {
989    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
990        if let [most @ .., last] = &self.soft[..] {
991            write!(f, "#keywords soft: ")?;
992            for Spanned(_, word) in most {
993                write!(f, "{word} ")?;
994            }
995            writeln!(f, "{};", last.1)?;
996        }
997        if let [most @ .., last] = &self.hard[..] {
998            write!(f, "#keywords hard: ")?;
999            for Spanned(_, word) in most {
1000                write!(f, "{word} ")?;
1001            }
1002            writeln!(f, "{};", last.1)?;
1003        }
1004        Ok(())
1005    }
1006}
1007
1008/// The set of preambles.
1009#[derive(Clone, Debug, Default, Hash, PartialEq, Eq, PartialOrd, Ord)]
1010pub struct Preambles {
1011    pub keywords: Keywords,
1012}
1013impl Display for Preambles {
1014    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1015        writeln!(f, "{}", self.keywords)
1016    }
1017}
1018
1019/// A TECTA PEG module.
1020#[derive(Clone, Debug, Default, Hash, PartialEq, Eq, PartialOrd, Ord)]
1021pub struct TectaPegModule {
1022    pub preambles: Preambles,
1023    pub rules: BTreeMap<String, Vec<Rule>>,
1024}
1025impl Display for TectaPegModule {
1026    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1027        write!(f, "{}", self.preambles)?;
1028        for (rule_name, rules) in &self.rules {
1029            write!(f, "{rule_name} =")?;
1030            for rule in rules {
1031                write!(f, " {rule}")?;
1032            }
1033            writeln!(f, ";")?;
1034        }
1035        Ok(())
1036    }
1037}
1038
1039/// Parses a list of identifiers.
1040pub fn parse_basic_identifier_list(input: &mut parse_input!()) -> Result<Vec<Spanned<String>>> {
1041    let mut identifiers = vec![];
1042    loop {
1043        skip_ws(input);
1044        if let Some(Spanned(_, ';')) = input.clone().next() {
1045            return Ok(identifiers);
1046        }
1047        identifiers.push(identifier_or_eof(input)?);
1048    }
1049}
1050
1051/// Parses a TECTA PEG module; a set of preambles and rule definitions.
1052pub fn parse_module_inner(input: &mut parse_input!()) -> Result<TectaPegModule> {
1053    let mut module = TectaPegModule::default();
1054    loop {
1055        skip_ws(input);
1056        // TODO: split preamble into function
1057        if let Some(Spanned(_, '#')) = input.clone().next() {
1058            input.next();
1059            let Spanned(span, name) = identifier_or_eof(input)?;
1060            match &name[..] {
1061                "keywords" => {
1062                    skip_ws(input);
1063                    let Spanned(span, name) = identifier_or_eof(input)?;
1064                    let is_hard = match &name[..] {
1065                        "hard" => true,
1066                        "soft" => false,
1067                        other => {
1068                            return Err(ErrorKind::ExpectedFound(
1069                                "keyword hardness".into(),
1070                                format!("`{other}`"),
1071                            )
1072                            .with(span));
1073                        }
1074                    };
1075
1076                    let colon_span = expect_exactly(input, ":")?;
1077                    let specified_keywords = parse_basic_identifier_list(input)?;
1078
1079                    if specified_keywords.is_empty() {
1080                        return Err(ErrorKind::ExpectedFound(
1081                            "non-empty keyword list".into(),
1082                            "empty list".into(),
1083                        )
1084                        .with(colon_span));
1085                    }
1086
1087                    let target_keyword_set = if is_hard {
1088                        &mut module.preambles.keywords.hard
1089                    } else {
1090                        &mut module.preambles.keywords.soft
1091                    };
1092                    if !target_keyword_set.is_empty() {
1093                        return Err(ErrorKind::ExistingPreamble(format!(
1094                            "keywords {}",
1095                            if is_hard { "hard" } else { "soft" }
1096                        ))
1097                        .with(colon_span));
1098                    }
1099
1100                    *target_keyword_set = specified_keywords;
1101                }
1102                other => {
1103                    return Err(
1104                        ErrorKind::ExpectedFound("preamble".into(), format!("`{other}`"))
1105                            .with(span),
1106                    );
1107                }
1108            }
1109            expect_exactly(input, ";")?;
1110        } else if let Some(Spanned(_, rule_name)) = expect_identifier(input)? {
1111            skip_ws(input);
1112            let _eq_span = expect_exactly(input, "=")?;
1113            skip_ws(input);
1114
1115            let mut peg_stack = PegStack(vec![]);
1116            while let RuleStatus::Continue = next_rule(input, &mut peg_stack)? {
1117                skip_ws(input);
1118            }
1119            skip_ws(input);
1120
1121            let sequence = peg_stack
1122                .0
1123                .into_iter()
1124                .map(Peg::try_as_rule)
1125                .collect::<Result<Vec<_>>>()?;
1126            module.rules.insert(rule_name, sequence);
1127        } else {
1128            break;
1129        }
1130    }
1131    Ok(module)
1132}
1133
1134/// Parses a TECTA PEG module from a string. See [`parse_module`].
1135pub fn parse_module(str: &str) -> Result<TectaPegModule> {
1136    let end_span = match str.lines().enumerate().last() {
1137        Some((index, line)) => Span {
1138            start_line: index + 1,
1139            end_line: index + 1,
1140            start_column: line.len(),
1141            end_column: line.len(),
1142        },
1143        None => Span::default(),
1144    };
1145    parse_module_inner(&mut ParseInput {
1146        chars: SpanningChars::new(str.chars()),
1147        end_span,
1148    })
1149}