Skip to main content

elenchus_parser/
grammar.rs

1//! The nom grammar and the recovering top-level driver.
2//!
3//! The per-statement parsers (`atom`, `literal`, `stmt_*`, `impl_body`,
4//! `list_body`) are ordinary `nom` combinators over a located [`Span`]; once a
5//! statement commits on its leading keyword, failures are [`promote`]d to a
6//! `Failure` carrying a precise message ([`Problem`]). The driver [`parse`] does
7//! not stop at the first failure: it records a [`Diagnostic`], resynchronises to
8//! the next top-level keyword line, and keeps going, so one run reports *every*
9//! syntax error.
10
11use alloc::string::{String, ToString};
12use alloc::vec;
13use alloc::vec::Vec;
14
15use nom::{
16    IResult, Parser,
17    branch::alt,
18    bytes::complete::{tag, take_while},
19    character::complete::{char, line_ending, satisfy, space0, space1},
20    combinator::{eof, opt, recognize, value},
21    multi::many0,
22    sequence::{delimited, preceded, terminated},
23};
24
25use crate::ast::{
26    Atom, Body, CloseKind, Conn, ListOp, Literal, Located, Program, Quant, Span, Statement,
27};
28use crate::diag::{Diagnostic, Diagnostics};
29use crate::keywords::{is_reserved, is_top_level, keyword_in, kw};
30
31// --- Parser primitives -----------------------------------------------------
32
33/// Internal nom error carrying a human message and the precise failure span.
34/// Recoverable failures are `nom::Err::Error`; once a statement has committed
35/// (its leading keyword is recognized), failures are promoted to
36/// `nom::Err::Failure` so the error points at the *real* problem with a specific
37/// message instead of backtracking to a generic "expected a statement".
38#[derive(Debug, Clone)]
39struct Problem<'a> {
40    input: Span<'a>,
41    message: String,
42}
43
44impl<'a> nom::error::ParseError<Span<'a>> for Problem<'a> {
45    fn from_error_kind(input: Span<'a>, _: nom::error::ErrorKind) -> Self {
46        Problem {
47            input,
48            message: String::from("unexpected token"),
49        }
50    }
51    fn append(_: Span<'a>, _: nom::error::ErrorKind, other: Self) -> Self {
52        other
53    }
54}
55
56/// nom result specialized to our [`Problem`] error and located `Span` input.
57type PResult<'a, T> = IResult<Span<'a>, T, Problem<'a>>;
58
59/// Turn a recoverable `Error` into a committed `Failure` at `at` with `msg`.
60/// Already-`Failure` (a deeper, more specific message) and `Ok` pass through.
61fn promote<'a, T>(r: PResult<'a, T>, at: Span<'a>, msg: &str) -> PResult<'a, T> {
62    match r {
63        Err(nom::Err::Error(_)) => Err(nom::Err::Failure(Problem {
64            input: at,
65            message: String::from(msg),
66        })),
67        other => other,
68    }
69}
70
71/// A plain recoverable `Error` at `input` (lets an enclosing `alt` backtrack).
72fn perr<'a, T>(input: Span<'a>) -> PResult<'a, T> {
73    Err(nom::Err::Error(Problem {
74        input,
75        message: String::from("unexpected token"),
76    }))
77}
78
79/// Characters allowed *after* the first in an identifier. Letters and digits of
80/// *any* script are accepted (Unicode `is_alphanumeric`), so `условие` or `名前`
81/// are valid; `_` joins multi-word names. `.` is **not** an identifier character:
82/// it is the domain separator (`physics.engine`), so use `_` for compound
83/// subjects (`Creature_A`). Punctuation and other symbols are rejected.
84fn is_ident_char(c: char) -> bool {
85    c.is_alphanumeric() || c == '_'
86}
87
88/// A bare identifier (does not reject reserved words). The first character must
89/// be a letter of any script (`is_alphabetic`) — never a digit, `_`, `.`, or
90/// punctuation — so identifiers stay distinct from numbers and operators.
91fn raw_identifier<'a>(input: Span<'a>) -> PResult<'a, Span<'a>> {
92    recognize((satisfy(|c| c.is_alphabetic()), take_while(is_ident_char))).parse(input)
93}
94
95/// An identifier that is not a reserved keyword.
96fn identifier<'a>(input: Span<'a>) -> PResult<'a, Located<'a, &'a str>> {
97    let start = input;
98    let (rest, sp) = raw_identifier(input)?;
99    if is_reserved(sp.fragment()) {
100        return perr(start);
101    }
102    Ok((
103        rest,
104        Located {
105            data: *sp.fragment(),
106            span: start,
107        },
108    ))
109}
110
111/// A line comment `// ... ` up to (but not including) the line ending.
112fn comment<'a>(input: Span<'a>) -> PResult<'a, Span<'a>> {
113    recognize((tag("//"), take_while(|c| c != '\n' && c != '\r'))).parse(input)
114}
115
116/// End of a statement line: trailing spaces, an optional trailing comment,
117/// then a line ending or EOF.
118fn eol<'a>(input: Span<'a>) -> PResult<'a, ()> {
119    value((), (space0, opt(comment), alt((line_ending, eof)))).parse(input)
120}
121
122/// A blank line or a full-line comment (must consume a line ending to progress).
123fn noise_line<'a>(input: Span<'a>) -> PResult<'a, ()> {
124    value((), (space0, opt(comment), line_ending)).parse(input)
125}
126
127/// Skip any number of blank / comment lines between statements.
128fn skip_noise<'a>(input: Span<'a>) -> PResult<'a, ()> {
129    value((), many0(noise_line)).parse(input)
130}
131
132// --- Atoms & literals ------------------------------------------------------
133
134/// `[<domain>.]<subject> [<predicate> [<object>]]` — an atom, optionally qualified
135/// by a `domain.` prefix on the subject, then one to three space-separated
136/// identifiers. The domain is recognised only when an identifier is immediately
137/// followed by `.` (no space), so a bare `engine has_fuel` keeps `engine` as the
138/// subject. A lone identifier (`db_ready`) is a **bare proposition** — predicate
139/// and object both `None`; the compiler requires it to be a declared `VAR`.
140fn atom<'a>(input: Span<'a>) -> PResult<'a, Located<'a, Atom<'a>>> {
141    let start = input;
142    let (input, domain) = opt(terminated(identifier, char('.'))).parse(input)?;
143    let (input, subject) = identifier(input)?;
144    let (input, predicate) = opt(preceded(space1, identifier)).parse(input)?;
145    // An object can only follow a predicate; a bare proposition has neither.
146    let (input, object) = match predicate {
147        Some(_) => opt(preceded(space1, identifier)).parse(input)?,
148        None => (input, None),
149    };
150    Ok((
151        input,
152        Located {
153            data: Atom {
154                domain: domain.map(|d| d.data),
155                subject: subject.data,
156                predicate: predicate.map(|p| p.data),
157                object: object.map(|o| o.data),
158            },
159            span: start,
160        },
161    ))
162}
163
164/// An optionally `NOT`-prefixed [`atom`] — a literal inside a `WHEN`/`THEN` body.
165fn literal<'a>(input: Span<'a>) -> PResult<'a, Located<'a, Literal<'a>>> {
166    let start = input;
167    let (input, neg) = opt(terminated(tag(kw::NOT), space1)).parse(input)?;
168    let (input, a) = atom(input)?;
169    Ok((
170        input,
171        Located {
172            data: Literal {
173                negated: neg.is_some(),
174                atom: a.data,
175            },
176            span: start,
177        },
178    ))
179}
180
181/// A boolean value word `true` or `false` — the value of a `VAR` `DEFAULT` (and,
182/// later, a `PROVIDE`). Parsed through the identifier tokenizer and matched
183/// exactly, so `true`/`false` are not reserved and stay usable as ordinary atom
184/// words; only in value position do they read as booleans.
185fn bool_lit<'a>(input: Span<'a>) -> PResult<'a, bool> {
186    let (rest, sp) = raw_identifier(input)?;
187    match *sp.fragment() {
188        "true" => Ok((rest, true)),
189        "false" => Ok((rest, false)),
190        _ => perr(input),
191    }
192}
193
194/// An atom on its own (possibly indented) line: used inside list bodies.
195fn atom_line<'a>(input: Span<'a>) -> PResult<'a, Located<'a, Atom<'a>>> {
196    let (input, _) = space0(input)?;
197    let (input, a) = atom(input)?;
198    let (input, _) = eol(input)?;
199    Ok((input, a))
200}
201
202/// A single identifier on its own (possibly indented) line: a `SET` element. A
203/// reserved word or a non-identifier line yields a recoverable `Error` so `many0`
204/// stops cleanly at the next statement.
205fn element_line<'a>(input: Span<'a>) -> PResult<'a, Located<'a, &'a str>> {
206    let (input, _) = space0(input)?;
207    let (input, id) = identifier(input)?;
208    let (input, _) = eol(input)?;
209    Ok((input, id))
210}
211
212// --- Bodies ----------------------------------------------------------------
213
214/// One of the list-constraint keywords (`EXCLUSIVE`/`FORBIDS`/`ONEOF`/`ATLEAST`).
215fn list_op<'a>(input: Span<'a>) -> PResult<'a, ListOp> {
216    alt((
217        value(ListOp::Exclusive, tag(kw::EXCLUSIVE)),
218        value(ListOp::Forbids, tag(kw::FORBIDS)),
219        value(ListOp::OneOf, tag(kw::ONEOF)),
220        value(ListOp::AtLeast, tag(kw::ATLEAST)),
221    ))
222    .parse(input)
223}
224
225/// A list-constraint body: a list operator on its own line, then one atom per
226/// line (at least two).
227///
228/// Commit strategy: the leading `list_op` failing stays a recoverable `Error`
229/// so the `PREMISE` `alt` can fall through and try [`impl_body`] instead. *Once*
230/// the operator matched we are committed, so every subsequent failure is
231/// [`promote`]d to a `Failure` with a specific message — no backtracking to a
232/// generic "expected a statement".
233/// The diagnostic for a list body with fewer than the required two atoms — one
234/// spelling, used for both the missing-first and missing-second slot.
235const LIST_NEEDS_TWO_ATOMS: &str = "a list premise needs at least two atoms";
236
237fn list_body<'a>(input: Span<'a>) -> PResult<'a, Body<'a>> {
238    let (input, _) = space0(input)?;
239    // list_op failing stays Error so the PREMISE alt can try impl_body.
240    let (input, op) = list_op(input)?;
241    // Past here we are committed to a list body.
242    let (input, _) = promote(
243        eol(input),
244        input,
245        "expected a newline after the list operator",
246    )?;
247    let at = input;
248    let (input, first) = promote(atom_line(input), at, LIST_NEEDS_TWO_ATOMS)?;
249    let at = input;
250    let (input, second) = promote(atom_line(input), at, LIST_NEEDS_TWO_ATOMS)?;
251    let (input, rest) = many0(atom_line).parse(input)?;
252
253    let mut atoms = vec![first, second];
254    atoms.extend(rest);
255    Ok((input, Body::List { op, atoms }))
256}
257
258/// An `EXISTS <binder> IN <set>` body: the existential dual of a `FOR EACH … IN`
259/// (which is an "all"). One condition line carrying the binder follows. Desugars
260/// to an at-least-one over the per-element instantiations.
261///
262/// Commit strategy mirrors [`list_body`]: a missing leading `EXISTS` stays a
263/// recoverable `Error` so the `PREMISE` `alt` falls through to [`impl_body`];
264/// once `EXISTS` matched, every later failure is [`promote`]d to a precise message.
265fn exists_body<'a>(input: Span<'a>) -> PResult<'a, Body<'a>> {
266    let (input, _) = space0(input)?;
267    // EXISTS failing stays Error so the PREMISE alt can try impl_body.
268    let (input, _) = tag(kw::EXISTS).parse(input)?;
269    // Past here we are committed to an exists body.
270    let (input, _) = promote(
271        space1(input),
272        input,
273        "EXISTS expects a binder: EXISTS <binder> IN <set>",
274    )?;
275    let at = input;
276    let (input, binder) = promote(
277        identifier(input),
278        at,
279        "EXISTS expects a binder: EXISTS <binder> IN <set>",
280    )?;
281    let (input, _) = promote(
282        (space1, tag(kw::IN), space1).parse(input),
283        input,
284        "EXISTS expects `IN <set>`: EXISTS <binder> IN <set>",
285    )?;
286    let at = input;
287    let (input, set) = promote(identifier(input), at, "EXISTS expects a set name after IN")?;
288    let (input, _) = promote(
289        eol(input),
290        input,
291        "unexpected text after 'EXISTS <binder> IN <set>'",
292    )?;
293    let at = input;
294    let (input, atom) = promote(
295        atom_line(input),
296        at,
297        "EXISTS needs a condition line using the binder",
298    )?;
299    Ok((input, Body::Exists { binder, set, atom }))
300}
301
302/// A continuation `AND <literal>` / `OR <literal>` line inside a `WHEN`/`THEN`
303/// block. Returns `(Conn, literal)`. A line that is neither `AND` nor `OR` yields
304/// a recoverable `Error` so `many0` stops cleanly (e.g. at `THEN`/EOF).
305fn cont_line<'a>(input: Span<'a>) -> PResult<'a, (Conn, Located<'a, Literal<'a>>)> {
306    let (input, _) = space0(input)?;
307    // Not an AND/OR line → Error so many0 stops cleanly.
308    let (input, conn) =
309        alt((value(Conn::And, tag(kw::AND)), value(Conn::Or, tag(kw::OR)))).parse(input)?;
310    let (input, _) = space1(input)?;
311    let at = input;
312    let (input, lit) = promote(
313        literal(input),
314        at,
315        "AND/OR expects a literal: [NOT] <Subject> <predicate> [<object>]",
316    )?;
317    let (input, _) = promote(
318        eol(input),
319        input,
320        "unexpected text after the AND/OR literal",
321    )?;
322    Ok((input, (conn, lit)))
323}
324
325/// Reduce a group's continuation lines to a single [`Conn`], rejecting a mix of
326/// `AND` and `OR` in one group (point the error at the first line that switches).
327fn group_conn<'a>(conts: &[(Conn, Located<'a, Literal<'a>>)]) -> Result<Conn, Span<'a>> {
328    let mut seen: Option<Conn> = None;
329    for (conn, lit) in conts {
330        match seen {
331            None => seen = Some(*conn),
332            Some(s) if s != *conn => return Err(lit.span),
333            _ => {}
334        }
335    }
336    Ok(seen.unwrap_or(Conn::And))
337}
338
339/// Fail with a committed message at `at` (for in-body semantic checks).
340fn fail_at<'a, T>(at: Span<'a>, msg: &str) -> PResult<'a, T> {
341    Err(nom::Err::Failure(Problem {
342        input: at,
343        message: String::from(msg),
344    }))
345}
346
347/// An implication body: `WHEN <lit> [AND <lit>]* THEN <lit> [AND <lit>]*`.
348///
349/// Like [`list_body`], a missing leading `WHEN` stays a recoverable `Error` (so
350/// the `PREMISE` `alt` can try a list body); after `WHEN` matches we are committed
351/// and use [`promote`] for precise errors. Antecedent and consequent each start
352/// with one mandatory literal followed by zero or more `AND` lines.
353fn impl_body<'a>(input: Span<'a>) -> PResult<'a, Body<'a>> {
354    let (input, _) = space0(input)?;
355    // No WHEN → Error so the PREMISE alt can fall through to a list body.
356    let (input, _) = (tag(kw::WHEN), space1).parse(input)?;
357    // Committed to an implication body now.
358    let at = input;
359    let (input, when) = promote(
360        literal(input),
361        at,
362        "WHEN expects a literal: [NOT] <Subject> <predicate> [<object>]",
363    )?;
364    let (input, _) = promote(eol(input), input, "unexpected text after the WHEN literal")?;
365    let (input, ante_rest) = many0(cont_line).parse(input)?;
366    let ante_conn = match group_conn(&ante_rest) {
367        Ok(c) => c,
368        Err(span) => {
369            return fail_at(
370                span,
371                "don't mix AND and OR in one WHEN group — split it into separate premises",
372            );
373        }
374    };
375
376    let (input, _) = space0(input)?;
377    let at = input;
378    let (input, _) = promote(
379        tag(kw::THEN).parse(input),
380        at,
381        "expected THEN to complete the WHEN ... THEN implication",
382    )?;
383    let at = input;
384    let (input, then) = promote(
385        preceded(space1, literal).parse(input),
386        at,
387        "THEN expects a literal: [NOT] <Subject> <predicate> [<object>]",
388    )?;
389    let (input, _) = promote(eol(input), input, "unexpected text after the THEN literal")?;
390    let (input, cons_rest) = many0(cont_line).parse(input)?;
391    let cons_conn = match group_conn(&cons_rest) {
392        Ok(c) => c,
393        Err(span) => {
394            return fail_at(
395                span,
396                "don't mix AND and OR in one THEN group — split it into separate premises",
397            );
398        }
399    };
400
401    let mut antecedent = vec![when];
402    antecedent.extend(ante_rest.into_iter().map(|(_, l)| l));
403    let mut consequent = vec![then];
404    consequent.extend(cons_rest.into_iter().map(|(_, l)| l));
405    Ok((
406        input,
407        Body::Impl {
408            antecedent,
409            ante_conn,
410            consequent,
411            cons_conn,
412        },
413    ))
414}
415
416// --- Statements ------------------------------------------------------------
417
418/// `IMPORT "<path>" [AS <alias>]` — a quoted path, optionally bound to a local
419/// domain alias, on one line.
420fn stmt_import<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
421    let (input, _) = (tag(kw::IMPORT), space1).parse(input)?;
422    let start = input;
423    let (input, path) = promote(
424        delimited(char('"'), take_while(|c| c != '"' && c != '\n'), char('"')).parse(input),
425        start,
426        "IMPORT expects a quoted path, e.g. IMPORT \"physics.vrf\"",
427    )?;
428    // Optional `AS <alias>`: a local name for the imported domain.
429    let (input, alias) = opt(preceded((space1, tag(kw::AS), space1), identifier)).parse(input)?;
430    let (input, _) = promote(
431        eol(input),
432        input,
433        "unexpected text after the IMPORT path (did you mean AS <alias>?)",
434    )?;
435    Ok((
436        input,
437        Statement::Import {
438            path: Located {
439                data: *path.fragment(),
440                span: start,
441            },
442            alias,
443        },
444    ))
445}
446
447/// `DOMAIN <name>` — declare this file's domain on one line.
448fn stmt_domain<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
449    let (input, _) = (tag(kw::DOMAIN), space1).parse(input)?;
450    let at = input;
451    let (input, name) = promote(
452        identifier(input),
453        at,
454        "DOMAIN expects a name (a lowercase identifier), e.g. DOMAIN physics",
455    )?;
456    let (input, _) = promote(eol(input), input, "unexpected text after the DOMAIN name")?;
457    Ok((input, Statement::Domain(name)))
458}
459
460/// `FACT <atom>` — a TRUE assertion.
461fn stmt_fact<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
462    let (input, _) = (tag(kw::FACT), space1).parse(input)?;
463    let at = input;
464    let (input, a) = promote(
465        atom(input),
466        at,
467        "FACT expects an atom: <Subject> <predicate> [<object>]",
468    )?;
469    let (input, _) = promote(eol(input), input, "unexpected text after the FACT atom")?;
470    Ok((input, Statement::Fact(a)))
471}
472
473/// `ASSUME [NOT] <atom>` — a soft (retractable) assertion. Accepts a leading
474/// `NOT` (like a `WHEN`/`THEN` literal), so `ASSUME NOT x a` is FALSE-by-default.
475fn stmt_assume<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
476    let (input, _) = (tag(kw::ASSUME), space1).parse(input)?;
477    let at = input;
478    let (input, lit) = promote(
479        literal(input),
480        at,
481        "ASSUME expects an atom: [NOT] <Subject> <predicate> [<object>]",
482    )?;
483    let (input, _) = promote(eol(input), input, "unexpected text after the ASSUME atom")?;
484    Ok((input, Statement::Assume(lit)))
485}
486
487/// `NOT <atom>` — a FALSE assertion. Tried last among statements so a body-level
488/// `NOT` literal is never mistaken for a top-level negation.
489fn stmt_negation<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
490    let (input, _) = (tag(kw::NOT), space1).parse(input)?;
491    let at = input;
492    let (input, a) = promote(
493        atom(input),
494        at,
495        "NOT expects an atom: <Subject> <predicate> [<object>]",
496    )?;
497    let (input, _) = promote(eol(input), input, "unexpected text after the NOT atom")?;
498    Ok((input, Statement::Negation(a)))
499}
500
501/// `CHECK [<subject>] [BIDIRECTIONAL]` — both modifiers optional.
502fn stmt_check<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
503    let (input, _) = tag(kw::CHECK).parse(input)?;
504    let (input, subject) = opt(preceded(space1, identifier)).parse(input)?;
505    let (input, bidir) = opt(preceded(space1, tag(kw::BIDIRECTIONAL))).parse(input)?;
506    let (input, _) = eol(input)?;
507    Ok((
508        input,
509        Statement::Check {
510            subject,
511            bidirectional: bidir.is_some(),
512        },
513    ))
514}
515
516/// `SET <name>` then one identifier per line (at least one) — declare a finite
517/// set to quantify a `PREMISE`/`RULE` over via `FOR EACH <binder> IN <name>`.
518fn stmt_set<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
519    let (input, _) = (tag(kw::SET), space1).parse(input)?;
520    let at = input;
521    let (input, name) = promote(
522        identifier(input),
523        at,
524        "SET expects a name (a lowercase identifier), e.g. SET tasks",
525    )?;
526    let (input, _) = promote(eol(input), input, "unexpected text after the SET name")?;
527    let at = input;
528    let (input, first) = promote(
529        element_line(input),
530        at,
531        "a SET needs at least one element — one identifier per line",
532    )?;
533    let (input, rest) = many0(element_line).parse(input)?;
534    let mut elements = vec![first];
535    elements.extend(rest);
536    Ok((input, Statement::Set { name, elements }))
537}
538
539/// `VAR <name> [DEFAULT true|false]` — declare an external boolean port on one
540/// line. The optional `DEFAULT` gives the fallback when no value is supplied.
541fn stmt_var<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
542    let (input, _) = (tag(kw::VAR), space1).parse(input)?;
543    let at = input;
544    let (input, name) = promote(
545        identifier(input),
546        at,
547        "VAR expects a name (a lowercase identifier), e.g. VAR db_ready",
548    )?;
549    // Optional `DEFAULT true|false`. Once DEFAULT matches, a bad value commits.
550    let (input, has_default) = opt(preceded(space1, tag(kw::DEFAULT))).parse(input)?;
551    let (input, default) = if has_default.is_some() {
552        let (input, _) = promote(
553            space1(input),
554            input,
555            "DEFAULT expects a value: DEFAULT true|false",
556        )?;
557        let at = input;
558        let (input, v) = promote(bool_lit(input), at, "DEFAULT expects true or false")?;
559        (input, Some(v))
560    } else {
561        (input, None)
562    };
563    let (input, _) = promote(
564        eol(input),
565        input,
566        "unexpected text after the VAR declaration",
567    )?;
568    Ok((input, Statement::Var { name, default }))
569}
570
571/// `PROVIDE [<domain>.]<port|atom>: true|false` — bind an external value on one
572/// line. The target reuses the [`atom`] grammar, so it accepts a lone port
573/// (`PROVIDE db_ready: true`), a multi-word atom (`PROVIDE engine has_fuel:
574/// true`), and a `domain.` prefix (`PROVIDE self.has_vision: true`).
575fn stmt_provide<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
576    let (input, _) = (tag(kw::PROVIDE), space1).parse(input)?;
577    let at = input;
578    let (input, atom) = promote(
579        atom(input),
580        at,
581        "PROVIDE expects a port or atom, e.g. PROVIDE db_ready: true",
582    )?;
583    let (input, _) = space0(input)?;
584    let (input, _) = promote(
585        char(':').parse(input),
586        input,
587        "PROVIDE expects ':' then a value: PROVIDE <port>: true|false",
588    )?;
589    let (input, _) = space0(input)?;
590    let at = input;
591    let (input, value) = promote(bool_lit(input), at, "PROVIDE expects true or false")?;
592    let (input, _) = promote(eol(input), input, "unexpected text after the PROVIDE value")?;
593    Ok((input, Statement::Provide { atom, value }))
594}
595
596/// One closure kind keyword: `TRANSITIVE`/`SYMMETRIC`/`REFLEXIVE`/`EQUIVALENCE`/`SCC`.
597fn closure_kind<'a>(input: Span<'a>) -> PResult<'a, CloseKind> {
598    alt((
599        value(CloseKind::Transitive, tag(kw::TRANSITIVE)),
600        value(CloseKind::Symmetric, tag(kw::SYMMETRIC)),
601        value(CloseKind::Reflexive, tag(kw::REFLEXIVE)),
602        value(CloseKind::Equivalence, tag(kw::EQUIVALENCE)),
603        value(CloseKind::Scc, tag(kw::SCC)),
604    ))
605    .parse(input)
606}
607
608/// `CLOSE <relation> <kind>` — close a relation's FACT pairs at compile time.
609fn stmt_close<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
610    let (input, _) = (tag(kw::CLOSE), space1).parse(input)?;
611    let at = input;
612    let (input, relation) = promote(
613        identifier(input),
614        at,
615        "CLOSE expects a relation name, e.g. CLOSE depends_on TRANSITIVE",
616    )?;
617    let (input, _) = promote(
618        space1(input),
619        input,
620        "CLOSE expects a closure kind: CLOSE <relation> TRANSITIVE|SYMMETRIC|REFLEXIVE|EQUIVALENCE|SCC",
621    )?;
622    let at = input;
623    let (input, kind) = promote(
624        closure_kind(input),
625        at,
626        "CLOSE expects a closure kind: CLOSE <relation> TRANSITIVE|SYMMETRIC|REFLEXIVE|EQUIVALENCE|SCC",
627    )?;
628    let (input, _) = promote(
629        eol(input),
630        input,
631        "unexpected text after 'CLOSE <relation> <kind>'",
632    )?;
633    Ok((input, Statement::Close { relation, kind }))
634}
635
636/// The optional quantifier tail on a `PREMISE`/`RULE` header (between the name
637/// and the `:`). One of two forms:
638///   `FOR EACH <binder> IN <set>`         — over a declared SET, or
639///   `FOR EACH <left> <relation> <right>` — over the declared FACT pairs.
640/// There is exactly one quantifier and no production for a second, so nesting is
641/// unrepresentable.
642fn for_each<'a>(input: Span<'a>) -> PResult<'a, Quant<'a>> {
643    // No FOR → recoverable Error so `opt` yields None and the `:` is parsed next.
644    let (input, _) = tag(kw::FOR).parse(input)?;
645    // Committed: FOR can only begin a quantifier here.
646    let at = input;
647    let (input, _) = promote(
648        (space1, tag(kw::EACH), space1).parse(input),
649        at,
650        "FOR must be followed by EACH: FOR EACH <binder> IN <set>  (or  FOR EACH <a> <relation> <b>)",
651    )?;
652    let at = input;
653    let (input, first) = promote(
654        identifier(input),
655        at,
656        "expected a binder name after FOR EACH",
657    )?;
658    let (input, _) = promote(
659        space1(input),
660        input,
661        "expected `IN <set>` or a relation `<rel> <binder>` after the FOR EACH binder",
662    )?;
663    // `IN <set>` → set quantifier; otherwise `<predicate> <right>` → relation.
664    let after_in: PResult<'a, (Span<'a>, Span<'a>)> = (tag(kw::IN), space1).parse(input);
665    if let Ok((rest, _)) = after_in {
666        let at = rest;
667        let (rest, set) = promote(identifier(rest), at, "expected a set name after IN")?;
668        return Ok((rest, Quant::InSet { binder: first, set }));
669    }
670    let at = input;
671    let (input, predicate) = promote(
672        identifier(input),
673        at,
674        "expected a relation name (FOR EACH <a> <relation> <b>)",
675    )?;
676    let (input, _) = promote(
677        space1(input),
678        input,
679        "expected the second binder after the relation (FOR EACH <a> <relation> <b>)",
680    )?;
681    let at = input;
682    let (input, right) = promote(
683        identifier(input),
684        at,
685        "expected the second binder (FOR EACH <a> <relation> <b>)",
686    )?;
687    Ok((
688        input,
689        Quant::Relation {
690            left: first,
691            predicate,
692            right,
693        },
694    ))
695}
696
697/// `PREMISE <name> [FOR EACH …]: <body>` where the body is a list or an implication.
698/// Parse the `<name> [FOR EACH …]:` header shared by PREMISE and RULE, after the
699/// keyword has been consumed. The structure — the name, the single optional
700/// quantifier, the colon, and the end of line — lives here once; each keyword
701/// passes its own three diagnostics so the wording stays specific.
702fn named_header<'a>(
703    input: Span<'a>,
704    name_msg: &'static str,
705    colon_msg: &'static str,
706    tail_msg: &'static str,
707) -> PResult<'a, (Located<'a, &'a str>, Option<Quant<'a>>)> {
708    let at = input;
709    let (input, name) = promote(identifier(input), at, name_msg)?;
710    let (input, quant) = opt(preceded(space1, for_each)).parse(input)?;
711    let (input, _) = space0(input)?;
712    let (input, _) = promote(char(':').parse(input), input, colon_msg)?;
713    let (input, _) = promote(eol(input), input, tail_msg)?;
714    Ok((input, (name, quant)))
715}
716
717fn stmt_premise<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
718    let (input, _) = (tag(kw::PREMISE), space1).parse(input)?;
719    // Committed to a premise now.
720    let (input, (name, quant)) = named_header(
721        input,
722        "expected a premise name (a lowercase identifier)",
723        "expected ':' after the premise name",
724        "unexpected text after 'PREMISE <name>:'",
725    )?;
726    let at = input;
727    let (input, body) = promote(
728        alt((list_body, exists_body, impl_body)).parse(input),
729        at,
730        "a premise body must be a list (EXCLUSIVE/FORBIDS/ONEOF/ATLEAST), EXISTS ... IN, or WHEN ... THEN",
731    )?;
732    Ok((input, Statement::Premise { name, quant, body }))
733}
734
735/// `RULE <name> [FOR EACH …]: <implication>` — like a premise but the body must
736/// be `WHEN … THEN`.
737fn stmt_rule<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
738    let (input, _) = (tag(kw::RULE), space1).parse(input)?;
739    let (input, (name, quant)) = named_header(
740        input,
741        "expected a rule name (a lowercase identifier)",
742        "expected ':' after the rule name",
743        "unexpected text after 'RULE <name>:'",
744    )?;
745    let at = input;
746    let (input, body) = promote(impl_body(input), at, "a rule body must be WHEN ... THEN")?;
747    Ok((input, Statement::Rule { name, quant, body }))
748}
749
750/// One top-level statement. Order matters: each branch commits on its keyword,
751/// and `stmt_negation` comes last so a leading `NOT` is only read as a top-level
752/// negation when nothing else matched.
753fn statement<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
754    // Leading indentation is cosmetic everywhere, including on the top-level
755    // keyword line — so a whole program may be written indented (e.g. inside a
756    // host's here-doc) and parse identically.
757    let (input, _) = space0(input)?;
758    alt((
759        stmt_domain,
760        stmt_import,
761        stmt_set,
762        stmt_close,
763        stmt_var,
764        stmt_provide,
765        stmt_fact,
766        stmt_assume,
767        stmt_premise,
768        stmt_rule,
769        stmt_check,
770        stmt_negation,
771    ))
772    .parse(input)
773}
774
775// --- Recovering driver -----------------------------------------------------
776
777/// Message for a line that begins with no known top-level keyword. The list of
778/// statements is built from [`keywords::top_level_menu`], so it always names
779/// exactly the top-level keywords that exist — no hand-kept copy to drift.
780fn not_a_statement() -> String {
781    alloc::format!(
782        "expected a statement — a line must start with {}",
783        crate::keywords::top_level_menu()
784    )
785}
786
787/// Parse a full `.vrf` source into a [`Program`], collecting *every* syntax error.
788///
789/// On the first failing statement the parser does not give up: it records a
790/// [`Diagnostic`], resynchronises to the next top-level keyword line, and
791/// continues. A clean parse returns the [`Program`]; otherwise it returns all
792/// errors as [`Diagnostics`], whose `Display` renders each as a caret block with
793/// the keyword's correct syntax.
794pub fn parse(src: &str) -> Result<Program<'_>, Diagnostics> {
795    let mut input = Span::new(src);
796    let mut statements = Vec::new();
797    let mut errors: Vec<Diagnostic> = Vec::new();
798
799    loop {
800        if let Ok((rest, _)) = skip_noise(input) {
801            input = rest;
802        }
803        // Only a whitespace / trailing-comment tail remains — a clean end, not a
804        // statement (the `r#"..."#` fixtures end with an indented closing line).
805        if at_end(input.fragment()) {
806            break;
807        }
808        match statement(input) {
809            Ok((rest, stmt)) => {
810                statements.push(stmt);
811                input = rest;
812            }
813            // Committed: a keyword matched but the rest is wrong → precise message.
814            Err(nom::Err::Failure(p)) => {
815                errors.push(make_diag(src, p.input, p.message, false));
816                input = resync(p.input);
817            }
818            // Recoverable: this line started no statement keyword at all.
819            Err(nom::Err::Error(p)) => {
820                errors.push(make_diag(src, p.input, not_a_statement(), true));
821                input = resync(p.input);
822            }
823            // `complete` combinators never return Incomplete; stop defensively.
824            Err(nom::Err::Incomplete(_)) => break,
825        }
826    }
827
828    if errors.is_empty() {
829        Ok(Program { statements })
830    } else {
831        Err(Diagnostics { file: None, errors })
832    }
833}
834
835/// Whether only an ignorable tail remains: pure whitespace, or a trailing
836/// `//` comment with no following newline. `skip_noise` has already consumed any
837/// full blank/comment lines, so anything else is a real statement to parse.
838fn at_end(frag: &str) -> bool {
839    let t = frag.trim_start();
840    t.is_empty() || (t.starts_with("//") && !t.contains('\n'))
841}
842
843/// Build one [`Diagnostic`] from the failure span. `general` marks a
844/// not-tied-to-a-keyword error (shows the menu of top-level forms instead of a
845/// single syntax card).
846fn make_diag(src: &str, at: Span<'_>, message: String, general: bool) -> Diagnostic {
847    let line = at.location_line() as usize;
848    let col = at.get_column();
849    let line_text = src.lines().nth(line.saturating_sub(1)).unwrap_or("");
850    // The card follows the *message* (which names the construct in question),
851    // not the line the parser stalled on — a stall often lands on the *next*
852    // line (e.g. "expected THEN …" points at the CHECK after a bodyless WHEN),
853    // whose leading word would be a misleading card.
854    let keyword = if general { None } else { keyword_in(&message) };
855    Diagnostic {
856        line,
857        col,
858        width: caret_width(line_text, col),
859        message,
860        keyword,
861        general,
862        line_text: line_text.to_string(),
863    }
864}
865
866/// Caret length: from `col` to the last non-whitespace character of the line (at
867/// least one), so the underline covers the offending token / trailing text.
868fn caret_width(line_text: &str, col: usize) -> usize {
869    let start = col.saturating_sub(1);
870    let trimmed_len = line_text.trim_end().chars().count();
871    trimmed_len.saturating_sub(start).max(1)
872}
873
874/// Resynchronise after an error: skip the rest of the offending line, then any
875/// lines until one begins (after cosmetic indentation) with a top-level keyword,
876/// or EOF. This keeps a broken PREMISE body from cascading into spurious errors.
877fn resync(at: Span<'_>) -> Span<'_> {
878    let mut input = consume_line(at);
879    loop {
880        if input.fragment().is_empty() || starts_top_level(input) {
881            return input;
882        }
883        input = consume_line(input);
884    }
885}
886
887/// Consume through the next line ending (or to EOF if none remains). The
888/// `complete` combinators never fail here, so the `unwrap_or` is unreachable.
889fn consume_line(input: Span<'_>) -> Span<'_> {
890    let parsed: PResult<'_, Span<'_>> =
891        recognize((take_while(|c| c != '\n' && c != '\r'), opt(line_ending))).parse(input);
892    parsed.map(|(rest, _)| rest).unwrap_or(input)
893}
894
895/// Whether `input` begins (after cosmetic indentation) with a top-level keyword.
896fn starts_top_level(input: Span<'_>) -> bool {
897    let after = match space0::<_, Problem<'_>>(input) {
898        Ok((rest, _)) => rest,
899        Err(_) => return false,
900    };
901    let word: String = after
902        .fragment()
903        .chars()
904        .take_while(|c| c.is_ascii_uppercase())
905        .collect();
906    is_top_level(&word)
907}