Skip to main content

elenchus_parser/
grammar.rs

1//! The nom grammar and the recovering top-level driver.
2//!
3//! The per-statement parsers (`atom`, `literal`, `stmt_*`, `impl_body`,
4//! `list_body`) are ordinary `nom` combinators over a located [`Span`]; once a
5//! statement commits on its leading keyword, failures are [`promote`]d to a
6//! `Failure` carrying a precise message ([`Problem`]). The driver [`parse`] does
7//! not stop at the first failure: it records a [`Diagnostic`], resynchronises to
8//! the next top-level keyword line, and keeps going, so one run reports *every*
9//! syntax error.
10
11use alloc::string::{String, ToString};
12use alloc::vec;
13use alloc::vec::Vec;
14
15use nom::{
16    IResult, Parser,
17    branch::alt,
18    bytes::complete::{tag, take_while},
19    character::complete::{char, line_ending, satisfy, space0, space1},
20    combinator::{eof, opt, recognize, value},
21    multi::many0,
22    sequence::{delimited, preceded, terminated},
23};
24
25use crate::ast::{
26    Atom, Body, CloseKind, Conn, ListOp, Literal, Located, Program, Quant, Span, Statement,
27};
28use crate::diag::{Diagnostic, Diagnostics};
29use crate::keywords::{is_reserved, is_top_level, keyword_in, kw};
30
31// --- Parser primitives -----------------------------------------------------
32
33/// Internal nom error carrying a human message and the precise failure span.
34/// Recoverable failures are `nom::Err::Error`; once a statement has committed
35/// (its leading keyword is recognized), failures are promoted to
36/// `nom::Err::Failure` so the error points at the *real* problem with a specific
37/// message instead of backtracking to a generic "expected a statement".
38#[derive(Debug, Clone)]
39struct Problem<'a> {
40    input: Span<'a>,
41    message: String,
42}
43
44impl<'a> nom::error::ParseError<Span<'a>> for Problem<'a> {
45    fn from_error_kind(input: Span<'a>, _: nom::error::ErrorKind) -> Self {
46        Problem {
47            input,
48            message: String::from("unexpected token"),
49        }
50    }
51    fn append(_: Span<'a>, _: nom::error::ErrorKind, other: Self) -> Self {
52        other
53    }
54}
55
56/// nom result specialized to our [`Problem`] error and located `Span` input.
57type PResult<'a, T> = IResult<Span<'a>, T, Problem<'a>>;
58
59/// Turn a recoverable `Error` into a committed `Failure` at `at` with `msg`.
60/// Already-`Failure` (a deeper, more specific message) and `Ok` pass through.
61fn promote<'a, T>(r: PResult<'a, T>, at: Span<'a>, msg: &str) -> PResult<'a, T> {
62    match r {
63        Err(nom::Err::Error(_)) => Err(nom::Err::Failure(Problem {
64            input: at,
65            message: String::from(msg),
66        })),
67        other => other,
68    }
69}
70
71/// A plain recoverable `Error` at `input` (lets an enclosing `alt` backtrack).
72fn perr<'a, T>(input: Span<'a>) -> PResult<'a, T> {
73    Err(nom::Err::Error(Problem {
74        input,
75        message: String::from("unexpected token"),
76    }))
77}
78
79/// Characters allowed *after* the first in an identifier. Letters and digits of
80/// *any* script are accepted (Unicode `is_alphanumeric`), so `условие` or `名前`
81/// are valid; `_` joins multi-word names. `.` is **not** an identifier character:
82/// it is the domain separator (`physics.engine`), so use `_` for compound
83/// subjects (`Creature_A`). Punctuation and other symbols are rejected.
84fn is_ident_char(c: char) -> bool {
85    c.is_alphanumeric() || c == '_'
86}
87
88/// A bare identifier (does not reject reserved words). The first character must
89/// be a letter of any script (`is_alphabetic`) — never a digit, `_`, `.`, or
90/// punctuation — so identifiers stay distinct from numbers and operators.
91fn raw_identifier<'a>(input: Span<'a>) -> PResult<'a, Span<'a>> {
92    recognize((satisfy(|c| c.is_alphabetic()), take_while(is_ident_char))).parse(input)
93}
94
95/// An identifier that is not a reserved keyword.
96fn identifier<'a>(input: Span<'a>) -> PResult<'a, Located<'a, &'a str>> {
97    let start = input;
98    let (rest, sp) = raw_identifier(input)?;
99    if is_reserved(sp.fragment()) {
100        return perr(start);
101    }
102    Ok((
103        rest,
104        Located {
105            data: *sp.fragment(),
106            span: start,
107        },
108    ))
109}
110
111/// A line comment `// ... ` up to (but not including) the line ending.
112fn comment<'a>(input: Span<'a>) -> PResult<'a, Span<'a>> {
113    recognize((tag("//"), take_while(|c| c != '\n' && c != '\r'))).parse(input)
114}
115
116/// End of a statement line: trailing spaces, an optional trailing comment,
117/// then a line ending or EOF.
118fn eol<'a>(input: Span<'a>) -> PResult<'a, ()> {
119    value((), (space0, opt(comment), alt((line_ending, eof)))).parse(input)
120}
121
122/// A blank line or a full-line comment (must consume a line ending to progress).
123fn noise_line<'a>(input: Span<'a>) -> PResult<'a, ()> {
124    value((), (space0, opt(comment), line_ending)).parse(input)
125}
126
127/// Skip any number of blank / comment lines between statements.
128fn skip_noise<'a>(input: Span<'a>) -> PResult<'a, ()> {
129    value((), many0(noise_line)).parse(input)
130}
131
132// --- Atoms & literals ------------------------------------------------------
133
134/// `[<domain>.]<subject> [<predicate> [<object>]]` — an atom, optionally qualified
135/// by a `domain.` prefix on the subject, then one to three space-separated
136/// identifiers. The domain is recognised only when an identifier is immediately
137/// followed by `.` (no space), so a bare `engine has_fuel` keeps `engine` as the
138/// subject. A lone identifier (`db_ready`) is a **bare proposition** — predicate
139/// and object both `None`; the compiler requires it to be a declared `VAR`.
140fn atom<'a>(input: Span<'a>) -> PResult<'a, Located<'a, Atom<'a>>> {
141    let start = input;
142    let (input, domain) = opt(terminated(identifier, char('.'))).parse(input)?;
143    let (input, subject) = identifier(input)?;
144    let (input, predicate) = opt(preceded(space1, identifier)).parse(input)?;
145    // An object can only follow a predicate; a bare proposition has neither.
146    let (input, object) = match predicate {
147        Some(_) => opt(preceded(space1, identifier)).parse(input)?,
148        None => (input, None),
149    };
150    Ok((
151        input,
152        Located {
153            data: Atom {
154                domain: domain.map(|d| d.data),
155                subject: subject.data,
156                predicate: predicate.map(|p| p.data),
157                object: object.map(|o| o.data),
158            },
159            span: start,
160        },
161    ))
162}
163
164/// An optionally `NOT`-prefixed [`atom`] — a literal inside a `WHEN`/`THEN` body.
165fn literal<'a>(input: Span<'a>) -> PResult<'a, Located<'a, Literal<'a>>> {
166    let start = input;
167    let (input, neg) = opt(terminated(tag(kw::NOT), space1)).parse(input)?;
168    let (input, a) = atom(input)?;
169    Ok((
170        input,
171        Located {
172            data: Literal {
173                negated: neg.is_some(),
174                atom: a.data,
175            },
176            span: start,
177        },
178    ))
179}
180
181/// A boolean value word `true` or `false` — the value of a `VAR` `DEFAULT` (and,
182/// later, a `PROVIDE`). Parsed through the identifier tokenizer and matched
183/// exactly, so `true`/`false` are not reserved and stay usable as ordinary atom
184/// words; only in value position do they read as booleans.
185fn bool_lit<'a>(input: Span<'a>) -> PResult<'a, bool> {
186    let (rest, sp) = raw_identifier(input)?;
187    match *sp.fragment() {
188        "true" => Ok((rest, true)),
189        "false" => Ok((rest, false)),
190        _ => perr(input),
191    }
192}
193
194/// An atom on its own (possibly indented) line: used inside list bodies.
195fn atom_line<'a>(input: Span<'a>) -> PResult<'a, Located<'a, Atom<'a>>> {
196    let (input, _) = space0(input)?;
197    let (input, a) = atom(input)?;
198    let (input, _) = eol(input)?;
199    Ok((input, a))
200}
201
202/// A single identifier on its own (possibly indented) line: a `SET` element. A
203/// reserved word or a non-identifier line yields a recoverable `Error` so `many0`
204/// stops cleanly at the next statement.
205fn element_line<'a>(input: Span<'a>) -> PResult<'a, Located<'a, &'a str>> {
206    let (input, _) = space0(input)?;
207    let (input, id) = identifier(input)?;
208    let (input, _) = eol(input)?;
209    Ok((input, id))
210}
211
212// --- Bodies ----------------------------------------------------------------
213
214/// One of the list-constraint keywords (`EXCLUSIVE`/`FORBIDS`/`ONEOF`/`ATLEAST`).
215fn list_op<'a>(input: Span<'a>) -> PResult<'a, ListOp> {
216    alt((
217        value(ListOp::Exclusive, tag(kw::EXCLUSIVE)),
218        value(ListOp::Forbids, tag(kw::FORBIDS)),
219        value(ListOp::OneOf, tag(kw::ONEOF)),
220        value(ListOp::AtLeast, tag(kw::ATLEAST)),
221    ))
222    .parse(input)
223}
224
225/// A list-constraint body: a list operator on its own line, then one atom per
226/// line (at least two).
227///
228/// Commit strategy: the leading `list_op` failing stays a recoverable `Error`
229/// so the `PREMISE` `alt` can fall through and try [`impl_body`] instead. *Once*
230/// the operator matched we are committed, so every subsequent failure is
231/// [`promote`]d to a `Failure` with a specific message — no backtracking to a
232/// generic "expected a statement".
233/// The diagnostic for a list body with fewer than the required two atoms — one
234/// spelling, used for both the missing-first and missing-second slot.
235const LIST_NEEDS_TWO_ATOMS: &str = "a list premise needs at least two atoms";
236
237fn list_body<'a>(input: Span<'a>) -> PResult<'a, Body<'a>> {
238    let (input, _) = space0(input)?;
239    // list_op failing stays Error so the PREMISE alt can try impl_body.
240    let (input, op) = list_op(input)?;
241    // Past here we are committed to a list body.
242    let (input, _) = promote(
243        eol(input),
244        input,
245        "expected a newline after the list operator",
246    )?;
247    let at = input;
248    let (input, first) = promote(atom_line(input), at, LIST_NEEDS_TWO_ATOMS)?;
249    let at = input;
250    let (input, second) = promote(atom_line(input), at, LIST_NEEDS_TWO_ATOMS)?;
251    let (input, rest) = many0(atom_line).parse(input)?;
252
253    let mut atoms = vec![first, second];
254    atoms.extend(rest);
255    Ok((input, Body::List { op, atoms }))
256}
257
258/// A continuation `AND <literal>` / `OR <literal>` line inside a `WHEN`/`THEN`
259/// block. Returns `(Conn, literal)`. A line that is neither `AND` nor `OR` yields
260/// a recoverable `Error` so `many0` stops cleanly (e.g. at `THEN`/EOF).
261fn cont_line<'a>(input: Span<'a>) -> PResult<'a, (Conn, Located<'a, Literal<'a>>)> {
262    let (input, _) = space0(input)?;
263    // Not an AND/OR line → Error so many0 stops cleanly.
264    let (input, conn) =
265        alt((value(Conn::And, tag(kw::AND)), value(Conn::Or, tag(kw::OR)))).parse(input)?;
266    let (input, _) = space1(input)?;
267    let at = input;
268    let (input, lit) = promote(
269        literal(input),
270        at,
271        "AND/OR expects a literal: [NOT] <Subject> <predicate> [<object>]",
272    )?;
273    let (input, _) = promote(
274        eol(input),
275        input,
276        "unexpected text after the AND/OR literal",
277    )?;
278    Ok((input, (conn, lit)))
279}
280
281/// Reduce a group's continuation lines to a single [`Conn`], rejecting a mix of
282/// `AND` and `OR` in one group (point the error at the first line that switches).
283fn group_conn<'a>(conts: &[(Conn, Located<'a, Literal<'a>>)]) -> Result<Conn, Span<'a>> {
284    let mut seen: Option<Conn> = None;
285    for (conn, lit) in conts {
286        match seen {
287            None => seen = Some(*conn),
288            Some(s) if s != *conn => return Err(lit.span),
289            _ => {}
290        }
291    }
292    Ok(seen.unwrap_or(Conn::And))
293}
294
295/// Fail with a committed message at `at` (for in-body semantic checks).
296fn fail_at<'a, T>(at: Span<'a>, msg: &str) -> PResult<'a, T> {
297    Err(nom::Err::Failure(Problem {
298        input: at,
299        message: String::from(msg),
300    }))
301}
302
303/// An implication body: `WHEN <lit> [AND <lit>]* THEN <lit> [AND <lit>]*`.
304///
305/// Like [`list_body`], a missing leading `WHEN` stays a recoverable `Error` (so
306/// the `PREMISE` `alt` can try a list body); after `WHEN` matches we are committed
307/// and use [`promote`] for precise errors. Antecedent and consequent each start
308/// with one mandatory literal followed by zero or more `AND` lines.
309fn impl_body<'a>(input: Span<'a>) -> PResult<'a, Body<'a>> {
310    let (input, _) = space0(input)?;
311    // No WHEN → Error so the PREMISE alt can fall through to a list body.
312    let (input, _) = (tag(kw::WHEN), space1).parse(input)?;
313    // Committed to an implication body now.
314    let at = input;
315    let (input, when) = promote(
316        literal(input),
317        at,
318        "WHEN expects a literal: [NOT] <Subject> <predicate> [<object>]",
319    )?;
320    let (input, _) = promote(eol(input), input, "unexpected text after the WHEN literal")?;
321    let (input, ante_rest) = many0(cont_line).parse(input)?;
322    let ante_conn = match group_conn(&ante_rest) {
323        Ok(c) => c,
324        Err(span) => {
325            return fail_at(
326                span,
327                "don't mix AND and OR in one WHEN group — split it into separate premises",
328            );
329        }
330    };
331
332    let (input, _) = space0(input)?;
333    let at = input;
334    let (input, _) = promote(
335        tag(kw::THEN).parse(input),
336        at,
337        "expected THEN to complete the WHEN ... THEN implication",
338    )?;
339    let at = input;
340    let (input, then) = promote(
341        preceded(space1, literal).parse(input),
342        at,
343        "THEN expects a literal: [NOT] <Subject> <predicate> [<object>]",
344    )?;
345    let (input, _) = promote(eol(input), input, "unexpected text after the THEN literal")?;
346    let (input, cons_rest) = many0(cont_line).parse(input)?;
347    let cons_conn = match group_conn(&cons_rest) {
348        Ok(c) => c,
349        Err(span) => {
350            return fail_at(
351                span,
352                "don't mix AND and OR in one THEN group — split it into separate premises",
353            );
354        }
355    };
356
357    let mut antecedent = vec![when];
358    antecedent.extend(ante_rest.into_iter().map(|(_, l)| l));
359    let mut consequent = vec![then];
360    consequent.extend(cons_rest.into_iter().map(|(_, l)| l));
361    Ok((
362        input,
363        Body::Impl {
364            antecedent,
365            ante_conn,
366            consequent,
367            cons_conn,
368        },
369    ))
370}
371
372// --- Statements ------------------------------------------------------------
373
374/// `IMPORT "<path>" [AS <alias>]` — a quoted path, optionally bound to a local
375/// domain alias, on one line.
376fn stmt_import<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
377    let (input, _) = (tag(kw::IMPORT), space1).parse(input)?;
378    let start = input;
379    let (input, path) = promote(
380        delimited(char('"'), take_while(|c| c != '"' && c != '\n'), char('"')).parse(input),
381        start,
382        "IMPORT expects a quoted path, e.g. IMPORT \"physics.vrf\"",
383    )?;
384    // Optional `AS <alias>`: a local name for the imported domain.
385    let (input, alias) = opt(preceded((space1, tag(kw::AS), space1), identifier)).parse(input)?;
386    let (input, _) = promote(
387        eol(input),
388        input,
389        "unexpected text after the IMPORT path (did you mean AS <alias>?)",
390    )?;
391    Ok((
392        input,
393        Statement::Import {
394            path: Located {
395                data: *path.fragment(),
396                span: start,
397            },
398            alias,
399        },
400    ))
401}
402
403/// `DOMAIN <name>` — declare this file's domain on one line.
404fn stmt_domain<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
405    let (input, _) = (tag(kw::DOMAIN), space1).parse(input)?;
406    let at = input;
407    let (input, name) = promote(
408        identifier(input),
409        at,
410        "DOMAIN expects a name (a lowercase identifier), e.g. DOMAIN physics",
411    )?;
412    let (input, _) = promote(eol(input), input, "unexpected text after the DOMAIN name")?;
413    Ok((input, Statement::Domain(name)))
414}
415
416/// `FACT <atom>` — a TRUE assertion.
417fn stmt_fact<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
418    let (input, _) = (tag(kw::FACT), space1).parse(input)?;
419    let at = input;
420    let (input, a) = promote(
421        atom(input),
422        at,
423        "FACT expects an atom: <Subject> <predicate> [<object>]",
424    )?;
425    let (input, _) = promote(eol(input), input, "unexpected text after the FACT atom")?;
426    Ok((input, Statement::Fact(a)))
427}
428
429/// `ASSUME [NOT] <atom>` — a soft (retractable) assertion. Accepts a leading
430/// `NOT` (like a `WHEN`/`THEN` literal), so `ASSUME NOT x a` is FALSE-by-default.
431fn stmt_assume<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
432    let (input, _) = (tag(kw::ASSUME), space1).parse(input)?;
433    let at = input;
434    let (input, lit) = promote(
435        literal(input),
436        at,
437        "ASSUME expects an atom: [NOT] <Subject> <predicate> [<object>]",
438    )?;
439    let (input, _) = promote(eol(input), input, "unexpected text after the ASSUME atom")?;
440    Ok((input, Statement::Assume(lit)))
441}
442
443/// `NOT <atom>` — a FALSE assertion. Tried last among statements so a body-level
444/// `NOT` literal is never mistaken for a top-level negation.
445fn stmt_negation<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
446    let (input, _) = (tag(kw::NOT), space1).parse(input)?;
447    let at = input;
448    let (input, a) = promote(
449        atom(input),
450        at,
451        "NOT expects an atom: <Subject> <predicate> [<object>]",
452    )?;
453    let (input, _) = promote(eol(input), input, "unexpected text after the NOT atom")?;
454    Ok((input, Statement::Negation(a)))
455}
456
457/// `CHECK [<subject>] [BIDIRECTIONAL]` — both modifiers optional.
458fn stmt_check<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
459    let (input, _) = tag(kw::CHECK).parse(input)?;
460    let (input, subject) = opt(preceded(space1, identifier)).parse(input)?;
461    let (input, bidir) = opt(preceded(space1, tag(kw::BIDIRECTIONAL))).parse(input)?;
462    let (input, _) = eol(input)?;
463    Ok((
464        input,
465        Statement::Check {
466            subject,
467            bidirectional: bidir.is_some(),
468        },
469    ))
470}
471
472/// `SET <name>` then one identifier per line (at least one) — declare a finite
473/// set to quantify a `PREMISE`/`RULE` over via `FOR EACH <binder> IN <name>`.
474fn stmt_set<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
475    let (input, _) = (tag(kw::SET), space1).parse(input)?;
476    let at = input;
477    let (input, name) = promote(
478        identifier(input),
479        at,
480        "SET expects a name (a lowercase identifier), e.g. SET tasks",
481    )?;
482    let (input, _) = promote(eol(input), input, "unexpected text after the SET name")?;
483    let at = input;
484    let (input, first) = promote(
485        element_line(input),
486        at,
487        "a SET needs at least one element — one identifier per line",
488    )?;
489    let (input, rest) = many0(element_line).parse(input)?;
490    let mut elements = vec![first];
491    elements.extend(rest);
492    Ok((input, Statement::Set { name, elements }))
493}
494
495/// `VAR <name> [DEFAULT true|false]` — declare an external boolean port on one
496/// line. The optional `DEFAULT` gives the fallback when no value is supplied.
497fn stmt_var<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
498    let (input, _) = (tag(kw::VAR), space1).parse(input)?;
499    let at = input;
500    let (input, name) = promote(
501        identifier(input),
502        at,
503        "VAR expects a name (a lowercase identifier), e.g. VAR db_ready",
504    )?;
505    // Optional `DEFAULT true|false`. Once DEFAULT matches, a bad value commits.
506    let (input, has_default) = opt(preceded(space1, tag(kw::DEFAULT))).parse(input)?;
507    let (input, default) = if has_default.is_some() {
508        let (input, _) = promote(
509            space1(input),
510            input,
511            "DEFAULT expects a value: DEFAULT true|false",
512        )?;
513        let at = input;
514        let (input, v) = promote(bool_lit(input), at, "DEFAULT expects true or false")?;
515        (input, Some(v))
516    } else {
517        (input, None)
518    };
519    let (input, _) = promote(
520        eol(input),
521        input,
522        "unexpected text after the VAR declaration",
523    )?;
524    Ok((input, Statement::Var { name, default }))
525}
526
527/// `PROVIDE <name>: true|false` — bind a `VAR` port's value on one line.
528fn stmt_provide<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
529    let (input, _) = (tag(kw::PROVIDE), space1).parse(input)?;
530    let at = input;
531    let (input, name) = promote(
532        identifier(input),
533        at,
534        "PROVIDE expects a port name, e.g. PROVIDE db_ready: true",
535    )?;
536    let (input, _) = space0(input)?;
537    let (input, _) = promote(
538        char(':').parse(input),
539        input,
540        "PROVIDE expects ':' then a value: PROVIDE <name>: true|false",
541    )?;
542    let (input, _) = space0(input)?;
543    let at = input;
544    let (input, value) = promote(bool_lit(input), at, "PROVIDE expects true or false")?;
545    let (input, _) = promote(eol(input), input, "unexpected text after the PROVIDE value")?;
546    Ok((input, Statement::Provide { name, value }))
547}
548
549/// `CLOSE <relation> TRANSITIVE` — close a relation's FACT pairs at compile time.
550fn stmt_close<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
551    let (input, _) = (tag(kw::CLOSE), space1).parse(input)?;
552    let at = input;
553    let (input, relation) = promote(
554        identifier(input),
555        at,
556        "CLOSE expects a relation name, e.g. CLOSE depends_on TRANSITIVE",
557    )?;
558    let (input, _) = promote(
559        (space1, tag(kw::TRANSITIVE)).parse(input),
560        input,
561        "CLOSE expects a closure kind: CLOSE <relation> TRANSITIVE",
562    )?;
563    let (input, _) = promote(
564        eol(input),
565        input,
566        "unexpected text after 'CLOSE … TRANSITIVE'",
567    )?;
568    Ok((
569        input,
570        Statement::Close {
571            relation,
572            kind: CloseKind::Transitive,
573        },
574    ))
575}
576
577/// The optional quantifier tail on a `PREMISE`/`RULE` header (between the name
578/// and the `:`). One of two forms:
579///   `FOR EACH <binder> IN <set>`         — over a declared SET, or
580///   `FOR EACH <left> <relation> <right>` — over the declared FACT pairs.
581/// There is exactly one quantifier and no production for a second, so nesting is
582/// unrepresentable.
583fn for_each<'a>(input: Span<'a>) -> PResult<'a, Quant<'a>> {
584    // No FOR → recoverable Error so `opt` yields None and the `:` is parsed next.
585    let (input, _) = tag(kw::FOR).parse(input)?;
586    // Committed: FOR can only begin a quantifier here.
587    let at = input;
588    let (input, _) = promote(
589        (space1, tag(kw::EACH), space1).parse(input),
590        at,
591        "FOR must be followed by EACH: FOR EACH <binder> IN <set>  (or  FOR EACH <a> <relation> <b>)",
592    )?;
593    let at = input;
594    let (input, first) = promote(
595        identifier(input),
596        at,
597        "expected a binder name after FOR EACH",
598    )?;
599    let (input, _) = promote(
600        space1(input),
601        input,
602        "expected `IN <set>` or a relation `<rel> <binder>` after the FOR EACH binder",
603    )?;
604    // `IN <set>` → set quantifier; otherwise `<predicate> <right>` → relation.
605    let after_in: PResult<'a, (Span<'a>, Span<'a>)> = (tag(kw::IN), space1).parse(input);
606    if let Ok((rest, _)) = after_in {
607        let at = rest;
608        let (rest, set) = promote(identifier(rest), at, "expected a set name after IN")?;
609        return Ok((rest, Quant::InSet { binder: first, set }));
610    }
611    let at = input;
612    let (input, predicate) = promote(
613        identifier(input),
614        at,
615        "expected a relation name (FOR EACH <a> <relation> <b>)",
616    )?;
617    let (input, _) = promote(
618        space1(input),
619        input,
620        "expected the second binder after the relation (FOR EACH <a> <relation> <b>)",
621    )?;
622    let at = input;
623    let (input, right) = promote(
624        identifier(input),
625        at,
626        "expected the second binder (FOR EACH <a> <relation> <b>)",
627    )?;
628    Ok((
629        input,
630        Quant::Relation {
631            left: first,
632            predicate,
633            right,
634        },
635    ))
636}
637
638/// `PREMISE <name> [FOR EACH …]: <body>` where the body is a list or an implication.
639/// Parse the `<name> [FOR EACH …]:` header shared by PREMISE and RULE, after the
640/// keyword has been consumed. The structure — the name, the single optional
641/// quantifier, the colon, and the end of line — lives here once; each keyword
642/// passes its own three diagnostics so the wording stays specific.
643fn named_header<'a>(
644    input: Span<'a>,
645    name_msg: &'static str,
646    colon_msg: &'static str,
647    tail_msg: &'static str,
648) -> PResult<'a, (Located<'a, &'a str>, Option<Quant<'a>>)> {
649    let at = input;
650    let (input, name) = promote(identifier(input), at, name_msg)?;
651    let (input, quant) = opt(preceded(space1, for_each)).parse(input)?;
652    let (input, _) = space0(input)?;
653    let (input, _) = promote(char(':').parse(input), input, colon_msg)?;
654    let (input, _) = promote(eol(input), input, tail_msg)?;
655    Ok((input, (name, quant)))
656}
657
658fn stmt_premise<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
659    let (input, _) = (tag(kw::PREMISE), space1).parse(input)?;
660    // Committed to a premise now.
661    let (input, (name, quant)) = named_header(
662        input,
663        "expected a premise name (a lowercase identifier)",
664        "expected ':' after the premise name",
665        "unexpected text after 'PREMISE <name>:'",
666    )?;
667    let at = input;
668    let (input, body) = promote(
669        alt((list_body, impl_body)).parse(input),
670        at,
671        "a premise body must be a list (EXCLUSIVE/FORBIDS/ONEOF/ATLEAST) or WHEN ... THEN",
672    )?;
673    Ok((input, Statement::Premise { name, quant, body }))
674}
675
676/// `RULE <name> [FOR EACH …]: <implication>` — like a premise but the body must
677/// be `WHEN … THEN`.
678fn stmt_rule<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
679    let (input, _) = (tag(kw::RULE), space1).parse(input)?;
680    let (input, (name, quant)) = named_header(
681        input,
682        "expected a rule name (a lowercase identifier)",
683        "expected ':' after the rule name",
684        "unexpected text after 'RULE <name>:'",
685    )?;
686    let at = input;
687    let (input, body) = promote(impl_body(input), at, "a rule body must be WHEN ... THEN")?;
688    Ok((input, Statement::Rule { name, quant, body }))
689}
690
691/// One top-level statement. Order matters: each branch commits on its keyword,
692/// and `stmt_negation` comes last so a leading `NOT` is only read as a top-level
693/// negation when nothing else matched.
694fn statement<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
695    // Leading indentation is cosmetic everywhere, including on the top-level
696    // keyword line — so a whole program may be written indented (e.g. inside a
697    // host's here-doc) and parse identically.
698    let (input, _) = space0(input)?;
699    alt((
700        stmt_domain,
701        stmt_import,
702        stmt_set,
703        stmt_close,
704        stmt_var,
705        stmt_provide,
706        stmt_fact,
707        stmt_assume,
708        stmt_premise,
709        stmt_rule,
710        stmt_check,
711        stmt_negation,
712    ))
713    .parse(input)
714}
715
716// --- Recovering driver -----------------------------------------------------
717
718/// Message for a line that begins with no known top-level keyword. The list of
719/// statements is built from [`keywords::top_level_menu`], so it always names
720/// exactly the top-level keywords that exist — no hand-kept copy to drift.
721fn not_a_statement() -> String {
722    alloc::format!(
723        "expected a statement — a line must start with {}",
724        crate::keywords::top_level_menu()
725    )
726}
727
728/// Parse a full `.vrf` source into a [`Program`], collecting *every* syntax error.
729///
730/// On the first failing statement the parser does not give up: it records a
731/// [`Diagnostic`], resynchronises to the next top-level keyword line, and
732/// continues. A clean parse returns the [`Program`]; otherwise it returns all
733/// errors as [`Diagnostics`], whose `Display` renders each as a caret block with
734/// the keyword's correct syntax.
735pub fn parse(src: &str) -> Result<Program<'_>, Diagnostics> {
736    let mut input = Span::new(src);
737    let mut statements = Vec::new();
738    let mut errors: Vec<Diagnostic> = Vec::new();
739
740    loop {
741        if let Ok((rest, _)) = skip_noise(input) {
742            input = rest;
743        }
744        // Only a whitespace / trailing-comment tail remains — a clean end, not a
745        // statement (the `r#"..."#` fixtures end with an indented closing line).
746        if at_end(input.fragment()) {
747            break;
748        }
749        match statement(input) {
750            Ok((rest, stmt)) => {
751                statements.push(stmt);
752                input = rest;
753            }
754            // Committed: a keyword matched but the rest is wrong → precise message.
755            Err(nom::Err::Failure(p)) => {
756                errors.push(make_diag(src, p.input, p.message, false));
757                input = resync(p.input);
758            }
759            // Recoverable: this line started no statement keyword at all.
760            Err(nom::Err::Error(p)) => {
761                errors.push(make_diag(src, p.input, not_a_statement(), true));
762                input = resync(p.input);
763            }
764            // `complete` combinators never return Incomplete; stop defensively.
765            Err(nom::Err::Incomplete(_)) => break,
766        }
767    }
768
769    if errors.is_empty() {
770        Ok(Program { statements })
771    } else {
772        Err(Diagnostics { file: None, errors })
773    }
774}
775
776/// Whether only an ignorable tail remains: pure whitespace, or a trailing
777/// `//` comment with no following newline. `skip_noise` has already consumed any
778/// full blank/comment lines, so anything else is a real statement to parse.
779fn at_end(frag: &str) -> bool {
780    let t = frag.trim_start();
781    t.is_empty() || (t.starts_with("//") && !t.contains('\n'))
782}
783
784/// Build one [`Diagnostic`] from the failure span. `general` marks a
785/// not-tied-to-a-keyword error (shows the menu of top-level forms instead of a
786/// single syntax card).
787fn make_diag(src: &str, at: Span<'_>, message: String, general: bool) -> Diagnostic {
788    let line = at.location_line() as usize;
789    let col = at.get_column();
790    let line_text = src.lines().nth(line.saturating_sub(1)).unwrap_or("");
791    // The card follows the *message* (which names the construct in question),
792    // not the line the parser stalled on — a stall often lands on the *next*
793    // line (e.g. "expected THEN …" points at the CHECK after a bodyless WHEN),
794    // whose leading word would be a misleading card.
795    let keyword = if general { None } else { keyword_in(&message) };
796    Diagnostic {
797        line,
798        col,
799        width: caret_width(line_text, col),
800        message,
801        keyword,
802        general,
803        line_text: line_text.to_string(),
804    }
805}
806
807/// Caret length: from `col` to the last non-whitespace character of the line (at
808/// least one), so the underline covers the offending token / trailing text.
809fn caret_width(line_text: &str, col: usize) -> usize {
810    let start = col.saturating_sub(1);
811    let trimmed_len = line_text.trim_end().chars().count();
812    trimmed_len.saturating_sub(start).max(1)
813}
814
815/// Resynchronise after an error: skip the rest of the offending line, then any
816/// lines until one begins (after cosmetic indentation) with a top-level keyword,
817/// or EOF. This keeps a broken PREMISE body from cascading into spurious errors.
818fn resync(at: Span<'_>) -> Span<'_> {
819    let mut input = consume_line(at);
820    loop {
821        if input.fragment().is_empty() || starts_top_level(input) {
822            return input;
823        }
824        input = consume_line(input);
825    }
826}
827
828/// Consume through the next line ending (or to EOF if none remains). The
829/// `complete` combinators never fail here, so the `unwrap_or` is unreachable.
830fn consume_line(input: Span<'_>) -> Span<'_> {
831    let parsed: PResult<'_, Span<'_>> =
832        recognize((take_while(|c| c != '\n' && c != '\r'), opt(line_ending))).parse(input);
833    parsed.map(|(rest, _)| rest).unwrap_or(input)
834}
835
836/// Whether `input` begins (after cosmetic indentation) with a top-level keyword.
837fn starts_top_level(input: Span<'_>) -> bool {
838    let after = match space0::<_, Problem<'_>>(input) {
839        Ok((rest, _)) => rest,
840        Err(_) => return false,
841    };
842    let word: String = after
843        .fragment()
844        .chars()
845        .take_while(|c| c.is_ascii_uppercase())
846        .collect();
847    is_top_level(&word)
848}