Skip to main content

elenchus_parser/
grammar.rs

1//! The nom grammar and the recovering top-level driver.
2//!
3//! The per-statement parsers (`atom`, `literal`, `stmt_*`, `impl_body`,
4//! `list_body`) are ordinary `nom` combinators over a located [`Span`]; once a
5//! statement commits on its leading keyword, failures are [`promote`]d to a
6//! `Failure` carrying a precise message ([`Problem`]). The driver [`parse`] does
7//! not stop at the first failure: it records a [`Diagnostic`], resynchronises to
8//! the next top-level keyword line, and keeps going, so one run reports *every*
9//! syntax error.
10
11use alloc::string::{String, ToString};
12use alloc::vec;
13use alloc::vec::Vec;
14
15use nom::{
16    IResult, Parser,
17    branch::alt,
18    bytes::complete::{tag, take_while},
19    character::complete::{char, line_ending, satisfy, space0, space1},
20    combinator::{eof, opt, recognize, value},
21    multi::many0,
22    sequence::{delimited, preceded, terminated},
23};
24
25use crate::ast::{Atom, Body, Conn, ListOp, Literal, Located, Program, Span, Statement};
26use crate::diag::{Diagnostic, Diagnostics};
27use crate::keywords::{is_reserved, is_top_level, keyword_in, kw};
28
29// --- Parser primitives -----------------------------------------------------
30
31/// Internal nom error carrying a human message and the precise failure span.
32/// Recoverable failures are `nom::Err::Error`; once a statement has committed
33/// (its leading keyword is recognized), failures are promoted to
34/// `nom::Err::Failure` so the error points at the *real* problem with a specific
35/// message instead of backtracking to a generic "expected a statement".
36#[derive(Debug, Clone)]
37struct Problem<'a> {
38    input: Span<'a>,
39    message: String,
40}
41
42impl<'a> nom::error::ParseError<Span<'a>> for Problem<'a> {
43    fn from_error_kind(input: Span<'a>, _: nom::error::ErrorKind) -> Self {
44        Problem {
45            input,
46            message: String::from("unexpected token"),
47        }
48    }
49    fn append(_: Span<'a>, _: nom::error::ErrorKind, other: Self) -> Self {
50        other
51    }
52}
53
54/// nom result specialized to our [`Problem`] error and located `Span` input.
55type PResult<'a, T> = IResult<Span<'a>, T, Problem<'a>>;
56
57/// Turn a recoverable `Error` into a committed `Failure` at `at` with `msg`.
58/// Already-`Failure` (a deeper, more specific message) and `Ok` pass through.
59fn promote<'a, T>(r: PResult<'a, T>, at: Span<'a>, msg: &str) -> PResult<'a, T> {
60    match r {
61        Err(nom::Err::Error(_)) => Err(nom::Err::Failure(Problem {
62            input: at,
63            message: String::from(msg),
64        })),
65        other => other,
66    }
67}
68
69/// A plain recoverable `Error` at `input` (lets an enclosing `alt` backtrack).
70fn perr<'a, T>(input: Span<'a>) -> PResult<'a, T> {
71    Err(nom::Err::Error(Problem {
72        input,
73        message: String::from("unexpected token"),
74    }))
75}
76
77/// Characters allowed *after* the first in an identifier. Letters and digits of
78/// *any* script are accepted (Unicode `is_alphanumeric`), so `условие` or `名前`
79/// are valid; `_` joins multi-word names. `.` is **not** an identifier character:
80/// it is the domain separator (`physics.engine`), so use `_` for compound
81/// subjects (`Creature_A`). Punctuation and other symbols are rejected.
82fn is_ident_char(c: char) -> bool {
83    c.is_alphanumeric() || c == '_'
84}
85
86/// A bare identifier (does not reject reserved words). The first character must
87/// be a letter of any script (`is_alphabetic`) — never a digit, `_`, `.`, or
88/// punctuation — so identifiers stay distinct from numbers and operators.
89fn raw_identifier<'a>(input: Span<'a>) -> PResult<'a, Span<'a>> {
90    recognize((satisfy(|c| c.is_alphabetic()), take_while(is_ident_char))).parse(input)
91}
92
93/// An identifier that is not a reserved keyword.
94fn identifier<'a>(input: Span<'a>) -> PResult<'a, Located<'a, &'a str>> {
95    let start = input;
96    let (rest, sp) = raw_identifier(input)?;
97    if is_reserved(sp.fragment()) {
98        return perr(start);
99    }
100    Ok((
101        rest,
102        Located {
103            data: *sp.fragment(),
104            span: start,
105        },
106    ))
107}
108
109/// A line comment `// ... ` up to (but not including) the line ending.
110fn comment<'a>(input: Span<'a>) -> PResult<'a, Span<'a>> {
111    recognize((tag("//"), take_while(|c| c != '\n' && c != '\r'))).parse(input)
112}
113
114/// End of a statement line: trailing spaces, an optional trailing comment,
115/// then a line ending or EOF.
116fn eol<'a>(input: Span<'a>) -> PResult<'a, ()> {
117    value((), (space0, opt(comment), alt((line_ending, eof)))).parse(input)
118}
119
120/// A blank line or a full-line comment (must consume a line ending to progress).
121fn noise_line<'a>(input: Span<'a>) -> PResult<'a, ()> {
122    value((), (space0, opt(comment), line_ending)).parse(input)
123}
124
125/// Skip any number of blank / comment lines between statements.
126fn skip_noise<'a>(input: Span<'a>) -> PResult<'a, ()> {
127    value((), many0(noise_line)).parse(input)
128}
129
130// --- Atoms & literals ------------------------------------------------------
131
132/// `[<domain>.]<subject> <predicate> [<object>]` — an atom, optionally qualified
133/// by a `domain.` prefix on the subject, then two or three space-separated
134/// identifiers. The domain is recognised only when an identifier is immediately
135/// followed by `.` (no space), so a bare `engine has_fuel` keeps `engine` as the
136/// subject.
137fn atom<'a>(input: Span<'a>) -> PResult<'a, Located<'a, Atom<'a>>> {
138    let start = input;
139    let (input, domain) = opt(terminated(identifier, char('.'))).parse(input)?;
140    let (input, subject) = identifier(input)?;
141    let (input, _) = space1(input)?;
142    let (input, predicate) = identifier(input)?;
143    let (input, object) = opt(preceded(space1, identifier)).parse(input)?;
144    Ok((
145        input,
146        Located {
147            data: Atom {
148                domain: domain.map(|d| d.data),
149                subject: subject.data,
150                predicate: predicate.data,
151                object: object.map(|o| o.data),
152            },
153            span: start,
154        },
155    ))
156}
157
158/// An optionally `NOT`-prefixed [`atom`] — a literal inside a `WHEN`/`THEN` body.
159fn literal<'a>(input: Span<'a>) -> PResult<'a, Located<'a, Literal<'a>>> {
160    let start = input;
161    let (input, neg) = opt(terminated(tag(kw::NOT), space1)).parse(input)?;
162    let (input, a) = atom(input)?;
163    Ok((
164        input,
165        Located {
166            data: Literal {
167                negated: neg.is_some(),
168                atom: a.data,
169            },
170            span: start,
171        },
172    ))
173}
174
175/// An atom on its own (possibly indented) line: used inside list bodies.
176fn atom_line<'a>(input: Span<'a>) -> PResult<'a, Located<'a, Atom<'a>>> {
177    let (input, _) = space0(input)?;
178    let (input, a) = atom(input)?;
179    let (input, _) = eol(input)?;
180    Ok((input, a))
181}
182
183// --- Bodies ----------------------------------------------------------------
184
185/// One of the list-constraint keywords (`EXCLUSIVE`/`FORBIDS`/`ONEOF`/`ATLEAST`).
186fn list_op<'a>(input: Span<'a>) -> PResult<'a, ListOp> {
187    alt((
188        value(ListOp::Exclusive, tag(kw::EXCLUSIVE)),
189        value(ListOp::Forbids, tag(kw::FORBIDS)),
190        value(ListOp::OneOf, tag(kw::ONEOF)),
191        value(ListOp::AtLeast, tag(kw::ATLEAST)),
192    ))
193    .parse(input)
194}
195
196/// A list-constraint body: a list operator on its own line, then one atom per
197/// line (at least two).
198///
199/// Commit strategy: the leading `list_op` failing stays a recoverable `Error`
200/// so the `PREMISE` `alt` can fall through and try [`impl_body`] instead. *Once*
201/// the operator matched we are committed, so every subsequent failure is
202/// [`promote`]d to a `Failure` with a specific message — no backtracking to a
203/// generic "expected a statement".
204fn list_body<'a>(input: Span<'a>) -> PResult<'a, Body<'a>> {
205    let (input, _) = space0(input)?;
206    // list_op failing stays Error so the PREMISE alt can try impl_body.
207    let (input, op) = list_op(input)?;
208    // Past here we are committed to a list body.
209    let (input, _) = promote(
210        eol(input),
211        input,
212        "expected a newline after the list operator",
213    )?;
214    let at = input;
215    let (input, first) = promote(
216        atom_line(input),
217        at,
218        "a list premise needs at least two atoms",
219    )?;
220    let at = input;
221    let (input, second) = promote(
222        atom_line(input),
223        at,
224        "a list premise needs at least two atoms",
225    )?;
226    let (input, rest) = many0(atom_line).parse(input)?;
227
228    let mut atoms = vec![first, second];
229    atoms.extend(rest);
230    Ok((input, Body::List { op, atoms }))
231}
232
233/// A continuation `AND <literal>` / `OR <literal>` line inside a `WHEN`/`THEN`
234/// block. Returns `(Conn, literal)`. A line that is neither `AND` nor `OR` yields
235/// a recoverable `Error` so `many0` stops cleanly (e.g. at `THEN`/EOF).
236fn cont_line<'a>(input: Span<'a>) -> PResult<'a, (Conn, Located<'a, Literal<'a>>)> {
237    let (input, _) = space0(input)?;
238    // Not an AND/OR line → Error so many0 stops cleanly.
239    let (input, conn) =
240        alt((value(Conn::And, tag(kw::AND)), value(Conn::Or, tag(kw::OR)))).parse(input)?;
241    let (input, _) = space1(input)?;
242    let at = input;
243    let (input, lit) = promote(
244        literal(input),
245        at,
246        "AND/OR expects a literal: [NOT] <Subject> <predicate> [<object>]",
247    )?;
248    let (input, _) = promote(
249        eol(input),
250        input,
251        "unexpected text after the AND/OR literal",
252    )?;
253    Ok((input, (conn, lit)))
254}
255
256/// Reduce a group's continuation lines to a single [`Conn`], rejecting a mix of
257/// `AND` and `OR` in one group (point the error at the first line that switches).
258fn group_conn<'a>(conts: &[(Conn, Located<'a, Literal<'a>>)]) -> Result<Conn, Span<'a>> {
259    let mut seen: Option<Conn> = None;
260    for (conn, lit) in conts {
261        match seen {
262            None => seen = Some(*conn),
263            Some(s) if s != *conn => return Err(lit.span),
264            _ => {}
265        }
266    }
267    Ok(seen.unwrap_or(Conn::And))
268}
269
270/// Fail with a committed message at `at` (for in-body semantic checks).
271fn fail_at<'a, T>(at: Span<'a>, msg: &str) -> PResult<'a, T> {
272    Err(nom::Err::Failure(Problem {
273        input: at,
274        message: String::from(msg),
275    }))
276}
277
278/// An implication body: `WHEN <lit> [AND <lit>]* THEN <lit> [AND <lit>]*`.
279///
280/// Like [`list_body`], a missing leading `WHEN` stays a recoverable `Error` (so
281/// the `PREMISE` `alt` can try a list body); after `WHEN` matches we are committed
282/// and use [`promote`] for precise errors. Antecedent and consequent each start
283/// with one mandatory literal followed by zero or more `AND` lines.
284fn impl_body<'a>(input: Span<'a>) -> PResult<'a, Body<'a>> {
285    let (input, _) = space0(input)?;
286    // No WHEN → Error so the PREMISE alt can fall through to a list body.
287    let (input, _) = (tag(kw::WHEN), space1).parse(input)?;
288    // Committed to an implication body now.
289    let at = input;
290    let (input, when) = promote(
291        literal(input),
292        at,
293        "WHEN expects a literal: [NOT] <Subject> <predicate> [<object>]",
294    )?;
295    let (input, _) = promote(eol(input), input, "unexpected text after the WHEN literal")?;
296    let (input, ante_rest) = many0(cont_line).parse(input)?;
297    let ante_conn = match group_conn(&ante_rest) {
298        Ok(c) => c,
299        Err(span) => {
300            return fail_at(
301                span,
302                "don't mix AND and OR in one WHEN group — split it into separate premises",
303            );
304        }
305    };
306
307    let (input, _) = space0(input)?;
308    let at = input;
309    let (input, _) = promote(
310        tag(kw::THEN).parse(input),
311        at,
312        "expected THEN to complete the WHEN ... THEN implication",
313    )?;
314    let at = input;
315    let (input, then) = promote(
316        preceded(space1, literal).parse(input),
317        at,
318        "THEN expects a literal: [NOT] <Subject> <predicate> [<object>]",
319    )?;
320    let (input, _) = promote(eol(input), input, "unexpected text after the THEN literal")?;
321    let (input, cons_rest) = many0(cont_line).parse(input)?;
322    let cons_conn = match group_conn(&cons_rest) {
323        Ok(c) => c,
324        Err(span) => {
325            return fail_at(
326                span,
327                "don't mix AND and OR in one THEN group — split it into separate premises",
328            );
329        }
330    };
331
332    let mut antecedent = vec![when];
333    antecedent.extend(ante_rest.into_iter().map(|(_, l)| l));
334    let mut consequent = vec![then];
335    consequent.extend(cons_rest.into_iter().map(|(_, l)| l));
336    Ok((
337        input,
338        Body::Impl {
339            antecedent,
340            ante_conn,
341            consequent,
342            cons_conn,
343        },
344    ))
345}
346
347// --- Statements ------------------------------------------------------------
348
349/// `IMPORT "<path>" [AS <alias>]` — a quoted path, optionally bound to a local
350/// domain alias, on one line.
351fn stmt_import<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
352    let (input, _) = (tag(kw::IMPORT), space1).parse(input)?;
353    let start = input;
354    let (input, path) = promote(
355        delimited(char('"'), take_while(|c| c != '"' && c != '\n'), char('"')).parse(input),
356        start,
357        "IMPORT expects a quoted path, e.g. IMPORT \"physics.vrf\"",
358    )?;
359    // Optional `AS <alias>`: a local name for the imported domain.
360    let (input, alias) = opt(preceded((space1, tag(kw::AS), space1), identifier)).parse(input)?;
361    let (input, _) = promote(
362        eol(input),
363        input,
364        "unexpected text after the IMPORT path (did you mean AS <alias>?)",
365    )?;
366    Ok((
367        input,
368        Statement::Import {
369            path: Located {
370                data: *path.fragment(),
371                span: start,
372            },
373            alias,
374        },
375    ))
376}
377
378/// `DOMAIN <name>` — declare this file's domain on one line.
379fn stmt_domain<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
380    let (input, _) = (tag(kw::DOMAIN), space1).parse(input)?;
381    let at = input;
382    let (input, name) = promote(
383        identifier(input),
384        at,
385        "DOMAIN expects a name (a lowercase identifier), e.g. DOMAIN physics",
386    )?;
387    let (input, _) = promote(eol(input), input, "unexpected text after the DOMAIN name")?;
388    Ok((input, Statement::Domain(name)))
389}
390
391/// `FACT <atom>` — a TRUE assertion.
392fn stmt_fact<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
393    let (input, _) = (tag(kw::FACT), space1).parse(input)?;
394    let at = input;
395    let (input, a) = promote(
396        atom(input),
397        at,
398        "FACT expects an atom: <Subject> <predicate> [<object>]",
399    )?;
400    let (input, _) = promote(eol(input), input, "unexpected text after the FACT atom")?;
401    Ok((input, Statement::Fact(a)))
402}
403
404/// `ASSUME [NOT] <atom>` — a soft (retractable) assertion. Accepts a leading
405/// `NOT` (like a `WHEN`/`THEN` literal), so `ASSUME NOT x a` is FALSE-by-default.
406fn stmt_assume<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
407    let (input, _) = (tag(kw::ASSUME), space1).parse(input)?;
408    let at = input;
409    let (input, lit) = promote(
410        literal(input),
411        at,
412        "ASSUME expects an atom: [NOT] <Subject> <predicate> [<object>]",
413    )?;
414    let (input, _) = promote(eol(input), input, "unexpected text after the ASSUME atom")?;
415    Ok((input, Statement::Assume(lit)))
416}
417
418/// `NOT <atom>` — a FALSE assertion. Tried last among statements so a body-level
419/// `NOT` literal is never mistaken for a top-level negation.
420fn stmt_negation<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
421    let (input, _) = (tag(kw::NOT), space1).parse(input)?;
422    let at = input;
423    let (input, a) = promote(
424        atom(input),
425        at,
426        "NOT expects an atom: <Subject> <predicate> [<object>]",
427    )?;
428    let (input, _) = promote(eol(input), input, "unexpected text after the NOT atom")?;
429    Ok((input, Statement::Negation(a)))
430}
431
432/// `CHECK [<subject>] [BIDIRECTIONAL]` — both modifiers optional.
433fn stmt_check<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
434    let (input, _) = tag(kw::CHECK).parse(input)?;
435    let (input, subject) = opt(preceded(space1, identifier)).parse(input)?;
436    let (input, bidir) = opt(preceded(space1, tag(kw::BIDIRECTIONAL))).parse(input)?;
437    let (input, _) = eol(input)?;
438    Ok((
439        input,
440        Statement::Check {
441            subject,
442            bidirectional: bidir.is_some(),
443        },
444    ))
445}
446
447/// `PREMISE <name>: <body>` where the body is a list or an implication.
448fn stmt_premise<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
449    let (input, _) = (tag(kw::PREMISE), space1).parse(input)?;
450    // Committed to a premise now.
451    let at = input;
452    let (input, name) = promote(
453        identifier(input),
454        at,
455        "expected a premise name (a lowercase identifier)",
456    )?;
457    let (input, _) = space0(input)?;
458    let (input, _) = promote(
459        char(':').parse(input),
460        input,
461        "expected ':' after the premise name",
462    )?;
463    let (input, _) = promote(eol(input), input, "unexpected text after 'PREMISE <name>:'")?;
464    let at = input;
465    let (input, body) = promote(
466        alt((list_body, impl_body)).parse(input),
467        at,
468        "a premise body must be a list (EXCLUSIVE/FORBIDS/ONEOF/ATLEAST) or WHEN ... THEN",
469    )?;
470    Ok((input, Statement::Premise { name, body }))
471}
472
473/// `RULE <name>: <implication>` — like a premise but the body must be `WHEN … THEN`.
474fn stmt_rule<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
475    let (input, _) = (tag(kw::RULE), space1).parse(input)?;
476    let at = input;
477    let (input, name) = promote(
478        identifier(input),
479        at,
480        "expected a rule name (a lowercase identifier)",
481    )?;
482    let (input, _) = space0(input)?;
483    let (input, _) = promote(
484        char(':').parse(input),
485        input,
486        "expected ':' after the rule name",
487    )?;
488    let (input, _) = promote(eol(input), input, "unexpected text after 'RULE <name>:'")?;
489    let at = input;
490    let (input, body) = promote(impl_body(input), at, "a rule body must be WHEN ... THEN")?;
491    Ok((input, Statement::Rule { name, body }))
492}
493
494/// One top-level statement. Order matters: each branch commits on its keyword,
495/// and `stmt_negation` comes last so a leading `NOT` is only read as a top-level
496/// negation when nothing else matched.
497fn statement<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
498    // Leading indentation is cosmetic everywhere, including on the top-level
499    // keyword line — so a whole program may be written indented (e.g. inside a
500    // host's here-doc) and parse identically.
501    let (input, _) = space0(input)?;
502    alt((
503        stmt_domain,
504        stmt_import,
505        stmt_fact,
506        stmt_assume,
507        stmt_premise,
508        stmt_rule,
509        stmt_check,
510        stmt_negation,
511    ))
512    .parse(input)
513}
514
515// --- Recovering driver -----------------------------------------------------
516
517/// Message for a line that begins with no known top-level keyword.
518const NOT_A_STATEMENT: &str = "expected a statement — a line must start with DOMAIN, FACT, NOT, ASSUME, PREMISE, RULE, CHECK, or IMPORT";
519
520/// Parse a full `.vrf` source into a [`Program`], collecting *every* syntax error.
521///
522/// On the first failing statement the parser does not give up: it records a
523/// [`Diagnostic`], resynchronises to the next top-level keyword line, and
524/// continues. A clean parse returns the [`Program`]; otherwise it returns all
525/// errors as [`Diagnostics`], whose `Display` renders each as a caret block with
526/// the keyword's correct syntax.
527pub fn parse(src: &str) -> Result<Program<'_>, Diagnostics> {
528    let mut input = Span::new(src);
529    let mut statements = Vec::new();
530    let mut errors: Vec<Diagnostic> = Vec::new();
531
532    loop {
533        if let Ok((rest, _)) = skip_noise(input) {
534            input = rest;
535        }
536        // Only a whitespace / trailing-comment tail remains — a clean end, not a
537        // statement (the `r#"..."#` fixtures end with an indented closing line).
538        if at_end(input.fragment()) {
539            break;
540        }
541        match statement(input) {
542            Ok((rest, stmt)) => {
543                statements.push(stmt);
544                input = rest;
545            }
546            // Committed: a keyword matched but the rest is wrong → precise message.
547            Err(nom::Err::Failure(p)) => {
548                errors.push(make_diag(src, p.input, p.message, false));
549                input = resync(p.input);
550            }
551            // Recoverable: this line started no statement keyword at all.
552            Err(nom::Err::Error(p)) => {
553                errors.push(make_diag(src, p.input, String::from(NOT_A_STATEMENT), true));
554                input = resync(p.input);
555            }
556            // `complete` combinators never return Incomplete; stop defensively.
557            Err(nom::Err::Incomplete(_)) => break,
558        }
559    }
560
561    if errors.is_empty() {
562        Ok(Program { statements })
563    } else {
564        Err(Diagnostics { file: None, errors })
565    }
566}
567
568/// Whether only an ignorable tail remains: pure whitespace, or a trailing
569/// `//` comment with no following newline. `skip_noise` has already consumed any
570/// full blank/comment lines, so anything else is a real statement to parse.
571fn at_end(frag: &str) -> bool {
572    let t = frag.trim_start();
573    t.is_empty() || (t.starts_with("//") && !t.contains('\n'))
574}
575
576/// Build one [`Diagnostic`] from the failure span. `general` marks a
577/// not-tied-to-a-keyword error (shows the menu of top-level forms instead of a
578/// single syntax card).
579fn make_diag(src: &str, at: Span<'_>, message: String, general: bool) -> Diagnostic {
580    let line = at.location_line() as usize;
581    let col = at.get_column();
582    let line_text = src.lines().nth(line.saturating_sub(1)).unwrap_or("");
583    // The card follows the *message* (which names the construct in question),
584    // not the line the parser stalled on — a stall often lands on the *next*
585    // line (e.g. "expected THEN …" points at the CHECK after a bodyless WHEN),
586    // whose leading word would be a misleading card.
587    let keyword = if general { None } else { keyword_in(&message) };
588    Diagnostic {
589        line,
590        col,
591        width: caret_width(line_text, col),
592        message,
593        keyword,
594        general,
595        line_text: line_text.to_string(),
596    }
597}
598
599/// Caret length: from `col` to the last non-whitespace character of the line (at
600/// least one), so the underline covers the offending token / trailing text.
601fn caret_width(line_text: &str, col: usize) -> usize {
602    let start = col.saturating_sub(1);
603    let trimmed_len = line_text.trim_end().chars().count();
604    trimmed_len.saturating_sub(start).max(1)
605}
606
607/// Resynchronise after an error: skip the rest of the offending line, then any
608/// lines until one begins (after cosmetic indentation) with a top-level keyword,
609/// or EOF. This keeps a broken PREMISE body from cascading into spurious errors.
610fn resync(at: Span<'_>) -> Span<'_> {
611    let mut input = consume_line(at);
612    loop {
613        if input.fragment().is_empty() || starts_top_level(input) {
614            return input;
615        }
616        input = consume_line(input);
617    }
618}
619
620/// Consume through the next line ending (or to EOF if none remains). The
621/// `complete` combinators never fail here, so the `unwrap_or` is unreachable.
622fn consume_line(input: Span<'_>) -> Span<'_> {
623    let parsed: PResult<'_, Span<'_>> =
624        recognize((take_while(|c| c != '\n' && c != '\r'), opt(line_ending))).parse(input);
625    parsed.map(|(rest, _)| rest).unwrap_or(input)
626}
627
628/// Whether `input` begins (after cosmetic indentation) with a top-level keyword.
629fn starts_top_level(input: Span<'_>) -> bool {
630    let after = match space0::<_, Problem<'_>>(input) {
631        Ok((rest, _)) => rest,
632        Err(_) => return false,
633    };
634    let word: String = after
635        .fragment()
636        .chars()
637        .take_while(|c| c.is_ascii_uppercase())
638        .collect();
639    is_top_level(&word)
640}