Skip to main content

elenchus_parser/
grammar.rs

1//! The nom grammar and the recovering top-level driver.
2//!
3//! The per-statement parsers (`atom`, `literal`, `stmt_*`, `impl_body`,
4//! `list_body`) are ordinary `nom` combinators over a located [`Span`]; once a
5//! statement commits on its leading keyword, failures are [`promote`]d to a
6//! `Failure` carrying a precise message ([`Problem`]). The driver [`parse`] does
7//! not stop at the first failure: it records a [`Diagnostic`], resynchronises to
8//! the next top-level keyword line, and keeps going, so one run reports *every*
9//! syntax error.
10
11use alloc::string::{String, ToString};
12use alloc::vec;
13use alloc::vec::Vec;
14
15use nom::{
16    IResult, Parser,
17    branch::alt,
18    bytes::complete::{tag, take_while},
19    character::complete::{char, line_ending, satisfy, space0, space1},
20    combinator::{eof, opt, recognize, value},
21    multi::many0,
22    sequence::{delimited, preceded, terminated},
23};
24
25use crate::ast::{
26    Atom, Body, CloseKind, Conn, ListOp, Literal, Located, Program, Quant, Span, Statement,
27};
28use crate::diag::{Diagnostic, Diagnostics};
29use crate::keywords::{is_reserved, is_top_level, keyword_in, kw};
30
31// --- Parser primitives -----------------------------------------------------
32
33/// Internal nom error carrying a human message and the precise failure span.
34/// Recoverable failures are `nom::Err::Error`; once a statement has committed
35/// (its leading keyword is recognized), failures are promoted to
36/// `nom::Err::Failure` so the error points at the *real* problem with a specific
37/// message instead of backtracking to a generic "expected a statement".
38#[derive(Debug, Clone)]
39struct Problem<'a> {
40    input: Span<'a>,
41    message: String,
42}
43
44impl<'a> nom::error::ParseError<Span<'a>> for Problem<'a> {
45    fn from_error_kind(input: Span<'a>, _: nom::error::ErrorKind) -> Self {
46        Problem {
47            input,
48            message: String::from("unexpected token"),
49        }
50    }
51    fn append(_: Span<'a>, _: nom::error::ErrorKind, other: Self) -> Self {
52        other
53    }
54}
55
56/// nom result specialized to our [`Problem`] error and located `Span` input.
57type PResult<'a, T> = IResult<Span<'a>, T, Problem<'a>>;
58
59/// Turn a recoverable `Error` into a committed `Failure` at `at` with `msg`.
60/// Already-`Failure` (a deeper, more specific message) and `Ok` pass through.
61fn promote<'a, T>(r: PResult<'a, T>, at: Span<'a>, msg: &str) -> PResult<'a, T> {
62    match r {
63        Err(nom::Err::Error(_)) => Err(nom::Err::Failure(Problem {
64            input: at,
65            message: String::from(msg),
66        })),
67        other => other,
68    }
69}
70
71/// A plain recoverable `Error` at `input` (lets an enclosing `alt` backtrack).
72fn perr<'a, T>(input: Span<'a>) -> PResult<'a, T> {
73    Err(nom::Err::Error(Problem {
74        input,
75        message: String::from("unexpected token"),
76    }))
77}
78
79/// Characters allowed *after* the first in an identifier. Letters and digits of
80/// *any* script are accepted (Unicode `is_alphanumeric`), so `условие` or `名前`
81/// are valid; `_` joins multi-word names. `.` is **not** an identifier character:
82/// it is the domain separator (`physics.engine`), so use `_` for compound
83/// subjects (`Creature_A`). Punctuation and other symbols are rejected.
84fn is_ident_char(c: char) -> bool {
85    c.is_alphanumeric() || c == '_'
86}
87
88/// A bare identifier (does not reject reserved words). The first character must
89/// be a letter of any script (`is_alphabetic`) — never a digit, `_`, `.`, or
90/// punctuation — so identifiers stay distinct from numbers and operators.
91fn raw_identifier<'a>(input: Span<'a>) -> PResult<'a, Span<'a>> {
92    recognize((satisfy(|c| c.is_alphabetic()), take_while(is_ident_char))).parse(input)
93}
94
95/// An identifier that is not a reserved keyword.
96fn identifier<'a>(input: Span<'a>) -> PResult<'a, Located<'a, &'a str>> {
97    let start = input;
98    let (rest, sp) = raw_identifier(input)?;
99    if is_reserved(sp.fragment()) {
100        return perr(start);
101    }
102    Ok((
103        rest,
104        Located {
105            data: *sp.fragment(),
106            span: start,
107        },
108    ))
109}
110
111/// A line comment `// ... ` up to (but not including) the line ending.
112fn comment<'a>(input: Span<'a>) -> PResult<'a, Span<'a>> {
113    recognize((tag("//"), take_while(|c| c != '\n' && c != '\r'))).parse(input)
114}
115
116/// End of a statement line: trailing spaces, an optional trailing comment,
117/// then a line ending or EOF.
118fn eol<'a>(input: Span<'a>) -> PResult<'a, ()> {
119    value((), (space0, opt(comment), alt((line_ending, eof)))).parse(input)
120}
121
122/// A blank line or a full-line comment (must consume a line ending to progress).
123fn noise_line<'a>(input: Span<'a>) -> PResult<'a, ()> {
124    value((), (space0, opt(comment), line_ending)).parse(input)
125}
126
127/// Skip any number of blank / comment lines between statements.
128fn skip_noise<'a>(input: Span<'a>) -> PResult<'a, ()> {
129    value((), many0(noise_line)).parse(input)
130}
131
132// --- Atoms & literals ------------------------------------------------------
133
134/// `[<domain>.]<subject> <predicate> [<object>]` — an atom, optionally qualified
135/// by a `domain.` prefix on the subject, then two or three space-separated
136/// identifiers. The domain is recognised only when an identifier is immediately
137/// followed by `.` (no space), so a bare `engine has_fuel` keeps `engine` as the
138/// subject.
139fn atom<'a>(input: Span<'a>) -> PResult<'a, Located<'a, Atom<'a>>> {
140    let start = input;
141    let (input, domain) = opt(terminated(identifier, char('.'))).parse(input)?;
142    let (input, subject) = identifier(input)?;
143    let (input, _) = space1(input)?;
144    let (input, predicate) = identifier(input)?;
145    let (input, object) = opt(preceded(space1, identifier)).parse(input)?;
146    Ok((
147        input,
148        Located {
149            data: Atom {
150                domain: domain.map(|d| d.data),
151                subject: subject.data,
152                predicate: predicate.data,
153                object: object.map(|o| o.data),
154            },
155            span: start,
156        },
157    ))
158}
159
160/// An optionally `NOT`-prefixed [`atom`] — a literal inside a `WHEN`/`THEN` body.
161fn literal<'a>(input: Span<'a>) -> PResult<'a, Located<'a, Literal<'a>>> {
162    let start = input;
163    let (input, neg) = opt(terminated(tag(kw::NOT), space1)).parse(input)?;
164    let (input, a) = atom(input)?;
165    Ok((
166        input,
167        Located {
168            data: Literal {
169                negated: neg.is_some(),
170                atom: a.data,
171            },
172            span: start,
173        },
174    ))
175}
176
177/// An atom on its own (possibly indented) line: used inside list bodies.
178fn atom_line<'a>(input: Span<'a>) -> PResult<'a, Located<'a, Atom<'a>>> {
179    let (input, _) = space0(input)?;
180    let (input, a) = atom(input)?;
181    let (input, _) = eol(input)?;
182    Ok((input, a))
183}
184
185/// A single identifier on its own (possibly indented) line: a `SET` element. A
186/// reserved word or a non-identifier line yields a recoverable `Error` so `many0`
187/// stops cleanly at the next statement.
188fn element_line<'a>(input: Span<'a>) -> PResult<'a, Located<'a, &'a str>> {
189    let (input, _) = space0(input)?;
190    let (input, id) = identifier(input)?;
191    let (input, _) = eol(input)?;
192    Ok((input, id))
193}
194
195// --- Bodies ----------------------------------------------------------------
196
197/// One of the list-constraint keywords (`EXCLUSIVE`/`FORBIDS`/`ONEOF`/`ATLEAST`).
198fn list_op<'a>(input: Span<'a>) -> PResult<'a, ListOp> {
199    alt((
200        value(ListOp::Exclusive, tag(kw::EXCLUSIVE)),
201        value(ListOp::Forbids, tag(kw::FORBIDS)),
202        value(ListOp::OneOf, tag(kw::ONEOF)),
203        value(ListOp::AtLeast, tag(kw::ATLEAST)),
204    ))
205    .parse(input)
206}
207
208/// A list-constraint body: a list operator on its own line, then one atom per
209/// line (at least two).
210///
211/// Commit strategy: the leading `list_op` failing stays a recoverable `Error`
212/// so the `PREMISE` `alt` can fall through and try [`impl_body`] instead. *Once*
213/// the operator matched we are committed, so every subsequent failure is
214/// [`promote`]d to a `Failure` with a specific message — no backtracking to a
215/// generic "expected a statement".
216/// The diagnostic for a list body with fewer than the required two atoms — one
217/// spelling, used for both the missing-first and missing-second slot.
218const LIST_NEEDS_TWO_ATOMS: &str = "a list premise needs at least two atoms";
219
220fn list_body<'a>(input: Span<'a>) -> PResult<'a, Body<'a>> {
221    let (input, _) = space0(input)?;
222    // list_op failing stays Error so the PREMISE alt can try impl_body.
223    let (input, op) = list_op(input)?;
224    // Past here we are committed to a list body.
225    let (input, _) = promote(
226        eol(input),
227        input,
228        "expected a newline after the list operator",
229    )?;
230    let at = input;
231    let (input, first) = promote(atom_line(input), at, LIST_NEEDS_TWO_ATOMS)?;
232    let at = input;
233    let (input, second) = promote(atom_line(input), at, LIST_NEEDS_TWO_ATOMS)?;
234    let (input, rest) = many0(atom_line).parse(input)?;
235
236    let mut atoms = vec![first, second];
237    atoms.extend(rest);
238    Ok((input, Body::List { op, atoms }))
239}
240
241/// A continuation `AND <literal>` / `OR <literal>` line inside a `WHEN`/`THEN`
242/// block. Returns `(Conn, literal)`. A line that is neither `AND` nor `OR` yields
243/// a recoverable `Error` so `many0` stops cleanly (e.g. at `THEN`/EOF).
244fn cont_line<'a>(input: Span<'a>) -> PResult<'a, (Conn, Located<'a, Literal<'a>>)> {
245    let (input, _) = space0(input)?;
246    // Not an AND/OR line → Error so many0 stops cleanly.
247    let (input, conn) =
248        alt((value(Conn::And, tag(kw::AND)), value(Conn::Or, tag(kw::OR)))).parse(input)?;
249    let (input, _) = space1(input)?;
250    let at = input;
251    let (input, lit) = promote(
252        literal(input),
253        at,
254        "AND/OR expects a literal: [NOT] <Subject> <predicate> [<object>]",
255    )?;
256    let (input, _) = promote(
257        eol(input),
258        input,
259        "unexpected text after the AND/OR literal",
260    )?;
261    Ok((input, (conn, lit)))
262}
263
264/// Reduce a group's continuation lines to a single [`Conn`], rejecting a mix of
265/// `AND` and `OR` in one group (point the error at the first line that switches).
266fn group_conn<'a>(conts: &[(Conn, Located<'a, Literal<'a>>)]) -> Result<Conn, Span<'a>> {
267    let mut seen: Option<Conn> = None;
268    for (conn, lit) in conts {
269        match seen {
270            None => seen = Some(*conn),
271            Some(s) if s != *conn => return Err(lit.span),
272            _ => {}
273        }
274    }
275    Ok(seen.unwrap_or(Conn::And))
276}
277
278/// Fail with a committed message at `at` (for in-body semantic checks).
279fn fail_at<'a, T>(at: Span<'a>, msg: &str) -> PResult<'a, T> {
280    Err(nom::Err::Failure(Problem {
281        input: at,
282        message: String::from(msg),
283    }))
284}
285
286/// An implication body: `WHEN <lit> [AND <lit>]* THEN <lit> [AND <lit>]*`.
287///
288/// Like [`list_body`], a missing leading `WHEN` stays a recoverable `Error` (so
289/// the `PREMISE` `alt` can try a list body); after `WHEN` matches we are committed
290/// and use [`promote`] for precise errors. Antecedent and consequent each start
291/// with one mandatory literal followed by zero or more `AND` lines.
292fn impl_body<'a>(input: Span<'a>) -> PResult<'a, Body<'a>> {
293    let (input, _) = space0(input)?;
294    // No WHEN → Error so the PREMISE alt can fall through to a list body.
295    let (input, _) = (tag(kw::WHEN), space1).parse(input)?;
296    // Committed to an implication body now.
297    let at = input;
298    let (input, when) = promote(
299        literal(input),
300        at,
301        "WHEN expects a literal: [NOT] <Subject> <predicate> [<object>]",
302    )?;
303    let (input, _) = promote(eol(input), input, "unexpected text after the WHEN literal")?;
304    let (input, ante_rest) = many0(cont_line).parse(input)?;
305    let ante_conn = match group_conn(&ante_rest) {
306        Ok(c) => c,
307        Err(span) => {
308            return fail_at(
309                span,
310                "don't mix AND and OR in one WHEN group — split it into separate premises",
311            );
312        }
313    };
314
315    let (input, _) = space0(input)?;
316    let at = input;
317    let (input, _) = promote(
318        tag(kw::THEN).parse(input),
319        at,
320        "expected THEN to complete the WHEN ... THEN implication",
321    )?;
322    let at = input;
323    let (input, then) = promote(
324        preceded(space1, literal).parse(input),
325        at,
326        "THEN expects a literal: [NOT] <Subject> <predicate> [<object>]",
327    )?;
328    let (input, _) = promote(eol(input), input, "unexpected text after the THEN literal")?;
329    let (input, cons_rest) = many0(cont_line).parse(input)?;
330    let cons_conn = match group_conn(&cons_rest) {
331        Ok(c) => c,
332        Err(span) => {
333            return fail_at(
334                span,
335                "don't mix AND and OR in one THEN group — split it into separate premises",
336            );
337        }
338    };
339
340    let mut antecedent = vec![when];
341    antecedent.extend(ante_rest.into_iter().map(|(_, l)| l));
342    let mut consequent = vec![then];
343    consequent.extend(cons_rest.into_iter().map(|(_, l)| l));
344    Ok((
345        input,
346        Body::Impl {
347            antecedent,
348            ante_conn,
349            consequent,
350            cons_conn,
351        },
352    ))
353}
354
355// --- Statements ------------------------------------------------------------
356
357/// `IMPORT "<path>" [AS <alias>]` — a quoted path, optionally bound to a local
358/// domain alias, on one line.
359fn stmt_import<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
360    let (input, _) = (tag(kw::IMPORT), space1).parse(input)?;
361    let start = input;
362    let (input, path) = promote(
363        delimited(char('"'), take_while(|c| c != '"' && c != '\n'), char('"')).parse(input),
364        start,
365        "IMPORT expects a quoted path, e.g. IMPORT \"physics.vrf\"",
366    )?;
367    // Optional `AS <alias>`: a local name for the imported domain.
368    let (input, alias) = opt(preceded((space1, tag(kw::AS), space1), identifier)).parse(input)?;
369    let (input, _) = promote(
370        eol(input),
371        input,
372        "unexpected text after the IMPORT path (did you mean AS <alias>?)",
373    )?;
374    Ok((
375        input,
376        Statement::Import {
377            path: Located {
378                data: *path.fragment(),
379                span: start,
380            },
381            alias,
382        },
383    ))
384}
385
386/// `DOMAIN <name>` — declare this file's domain on one line.
387fn stmt_domain<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
388    let (input, _) = (tag(kw::DOMAIN), space1).parse(input)?;
389    let at = input;
390    let (input, name) = promote(
391        identifier(input),
392        at,
393        "DOMAIN expects a name (a lowercase identifier), e.g. DOMAIN physics",
394    )?;
395    let (input, _) = promote(eol(input), input, "unexpected text after the DOMAIN name")?;
396    Ok((input, Statement::Domain(name)))
397}
398
399/// `FACT <atom>` — a TRUE assertion.
400fn stmt_fact<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
401    let (input, _) = (tag(kw::FACT), space1).parse(input)?;
402    let at = input;
403    let (input, a) = promote(
404        atom(input),
405        at,
406        "FACT expects an atom: <Subject> <predicate> [<object>]",
407    )?;
408    let (input, _) = promote(eol(input), input, "unexpected text after the FACT atom")?;
409    Ok((input, Statement::Fact(a)))
410}
411
412/// `ASSUME [NOT] <atom>` — a soft (retractable) assertion. Accepts a leading
413/// `NOT` (like a `WHEN`/`THEN` literal), so `ASSUME NOT x a` is FALSE-by-default.
414fn stmt_assume<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
415    let (input, _) = (tag(kw::ASSUME), space1).parse(input)?;
416    let at = input;
417    let (input, lit) = promote(
418        literal(input),
419        at,
420        "ASSUME expects an atom: [NOT] <Subject> <predicate> [<object>]",
421    )?;
422    let (input, _) = promote(eol(input), input, "unexpected text after the ASSUME atom")?;
423    Ok((input, Statement::Assume(lit)))
424}
425
426/// `NOT <atom>` — a FALSE assertion. Tried last among statements so a body-level
427/// `NOT` literal is never mistaken for a top-level negation.
428fn stmt_negation<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
429    let (input, _) = (tag(kw::NOT), space1).parse(input)?;
430    let at = input;
431    let (input, a) = promote(
432        atom(input),
433        at,
434        "NOT expects an atom: <Subject> <predicate> [<object>]",
435    )?;
436    let (input, _) = promote(eol(input), input, "unexpected text after the NOT atom")?;
437    Ok((input, Statement::Negation(a)))
438}
439
440/// `CHECK [<subject>] [BIDIRECTIONAL]` — both modifiers optional.
441fn stmt_check<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
442    let (input, _) = tag(kw::CHECK).parse(input)?;
443    let (input, subject) = opt(preceded(space1, identifier)).parse(input)?;
444    let (input, bidir) = opt(preceded(space1, tag(kw::BIDIRECTIONAL))).parse(input)?;
445    let (input, _) = eol(input)?;
446    Ok((
447        input,
448        Statement::Check {
449            subject,
450            bidirectional: bidir.is_some(),
451        },
452    ))
453}
454
455/// `SET <name>` then one identifier per line (at least one) — declare a finite
456/// set to quantify a `PREMISE`/`RULE` over via `FOR EACH <binder> IN <name>`.
457fn stmt_set<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
458    let (input, _) = (tag(kw::SET), space1).parse(input)?;
459    let at = input;
460    let (input, name) = promote(
461        identifier(input),
462        at,
463        "SET expects a name (a lowercase identifier), e.g. SET tasks",
464    )?;
465    let (input, _) = promote(eol(input), input, "unexpected text after the SET name")?;
466    let at = input;
467    let (input, first) = promote(
468        element_line(input),
469        at,
470        "a SET needs at least one element — one identifier per line",
471    )?;
472    let (input, rest) = many0(element_line).parse(input)?;
473    let mut elements = vec![first];
474    elements.extend(rest);
475    Ok((input, Statement::Set { name, elements }))
476}
477
478/// `CLOSE <relation> TRANSITIVE` — close a relation's FACT pairs at compile time.
479fn stmt_close<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
480    let (input, _) = (tag(kw::CLOSE), space1).parse(input)?;
481    let at = input;
482    let (input, relation) = promote(
483        identifier(input),
484        at,
485        "CLOSE expects a relation name, e.g. CLOSE depends_on TRANSITIVE",
486    )?;
487    let (input, _) = promote(
488        (space1, tag(kw::TRANSITIVE)).parse(input),
489        input,
490        "CLOSE expects a closure kind: CLOSE <relation> TRANSITIVE",
491    )?;
492    let (input, _) = promote(
493        eol(input),
494        input,
495        "unexpected text after 'CLOSE … TRANSITIVE'",
496    )?;
497    Ok((
498        input,
499        Statement::Close {
500            relation,
501            kind: CloseKind::Transitive,
502        },
503    ))
504}
505
506/// The optional quantifier tail on a `PREMISE`/`RULE` header (between the name
507/// and the `:`). One of two forms:
508///   `FOR EACH <binder> IN <set>`         — over a declared SET, or
509///   `FOR EACH <left> <relation> <right>` — over the declared FACT pairs.
510/// There is exactly one quantifier and no production for a second, so nesting is
511/// unrepresentable.
512fn for_each<'a>(input: Span<'a>) -> PResult<'a, Quant<'a>> {
513    // No FOR → recoverable Error so `opt` yields None and the `:` is parsed next.
514    let (input, _) = tag(kw::FOR).parse(input)?;
515    // Committed: FOR can only begin a quantifier here.
516    let at = input;
517    let (input, _) = promote(
518        (space1, tag(kw::EACH), space1).parse(input),
519        at,
520        "FOR must be followed by EACH: FOR EACH <binder> IN <set>  (or  FOR EACH <a> <relation> <b>)",
521    )?;
522    let at = input;
523    let (input, first) = promote(
524        identifier(input),
525        at,
526        "expected a binder name after FOR EACH",
527    )?;
528    let (input, _) = promote(
529        space1(input),
530        input,
531        "expected `IN <set>` or a relation `<rel> <binder>` after the FOR EACH binder",
532    )?;
533    // `IN <set>` → set quantifier; otherwise `<predicate> <right>` → relation.
534    let after_in: PResult<'a, (Span<'a>, Span<'a>)> = (tag(kw::IN), space1).parse(input);
535    if let Ok((rest, _)) = after_in {
536        let at = rest;
537        let (rest, set) = promote(identifier(rest), at, "expected a set name after IN")?;
538        return Ok((rest, Quant::InSet { binder: first, set }));
539    }
540    let at = input;
541    let (input, predicate) = promote(
542        identifier(input),
543        at,
544        "expected a relation name (FOR EACH <a> <relation> <b>)",
545    )?;
546    let (input, _) = promote(
547        space1(input),
548        input,
549        "expected the second binder after the relation (FOR EACH <a> <relation> <b>)",
550    )?;
551    let at = input;
552    let (input, right) = promote(
553        identifier(input),
554        at,
555        "expected the second binder (FOR EACH <a> <relation> <b>)",
556    )?;
557    Ok((
558        input,
559        Quant::Relation {
560            left: first,
561            predicate,
562            right,
563        },
564    ))
565}
566
567/// `PREMISE <name> [FOR EACH …]: <body>` where the body is a list or an implication.
568/// Parse the `<name> [FOR EACH …]:` header shared by PREMISE and RULE, after the
569/// keyword has been consumed. The structure — the name, the single optional
570/// quantifier, the colon, and the end of line — lives here once; each keyword
571/// passes its own three diagnostics so the wording stays specific.
572fn named_header<'a>(
573    input: Span<'a>,
574    name_msg: &'static str,
575    colon_msg: &'static str,
576    tail_msg: &'static str,
577) -> PResult<'a, (Located<'a, &'a str>, Option<Quant<'a>>)> {
578    let at = input;
579    let (input, name) = promote(identifier(input), at, name_msg)?;
580    let (input, quant) = opt(preceded(space1, for_each)).parse(input)?;
581    let (input, _) = space0(input)?;
582    let (input, _) = promote(char(':').parse(input), input, colon_msg)?;
583    let (input, _) = promote(eol(input), input, tail_msg)?;
584    Ok((input, (name, quant)))
585}
586
587fn stmt_premise<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
588    let (input, _) = (tag(kw::PREMISE), space1).parse(input)?;
589    // Committed to a premise now.
590    let (input, (name, quant)) = named_header(
591        input,
592        "expected a premise name (a lowercase identifier)",
593        "expected ':' after the premise name",
594        "unexpected text after 'PREMISE <name>:'",
595    )?;
596    let at = input;
597    let (input, body) = promote(
598        alt((list_body, impl_body)).parse(input),
599        at,
600        "a premise body must be a list (EXCLUSIVE/FORBIDS/ONEOF/ATLEAST) or WHEN ... THEN",
601    )?;
602    Ok((input, Statement::Premise { name, quant, body }))
603}
604
605/// `RULE <name> [FOR EACH …]: <implication>` — like a premise but the body must
606/// be `WHEN … THEN`.
607fn stmt_rule<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
608    let (input, _) = (tag(kw::RULE), space1).parse(input)?;
609    let (input, (name, quant)) = named_header(
610        input,
611        "expected a rule name (a lowercase identifier)",
612        "expected ':' after the rule name",
613        "unexpected text after 'RULE <name>:'",
614    )?;
615    let at = input;
616    let (input, body) = promote(impl_body(input), at, "a rule body must be WHEN ... THEN")?;
617    Ok((input, Statement::Rule { name, quant, body }))
618}
619
620/// One top-level statement. Order matters: each branch commits on its keyword,
621/// and `stmt_negation` comes last so a leading `NOT` is only read as a top-level
622/// negation when nothing else matched.
623fn statement<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
624    // Leading indentation is cosmetic everywhere, including on the top-level
625    // keyword line — so a whole program may be written indented (e.g. inside a
626    // host's here-doc) and parse identically.
627    let (input, _) = space0(input)?;
628    alt((
629        stmt_domain,
630        stmt_import,
631        stmt_set,
632        stmt_close,
633        stmt_fact,
634        stmt_assume,
635        stmt_premise,
636        stmt_rule,
637        stmt_check,
638        stmt_negation,
639    ))
640    .parse(input)
641}
642
643// --- Recovering driver -----------------------------------------------------
644
645/// Message for a line that begins with no known top-level keyword. The list of
646/// statements is built from [`keywords::top_level_menu`], so it always names
647/// exactly the top-level keywords that exist — no hand-kept copy to drift.
648fn not_a_statement() -> String {
649    alloc::format!(
650        "expected a statement — a line must start with {}",
651        crate::keywords::top_level_menu()
652    )
653}
654
655/// Parse a full `.vrf` source into a [`Program`], collecting *every* syntax error.
656///
657/// On the first failing statement the parser does not give up: it records a
658/// [`Diagnostic`], resynchronises to the next top-level keyword line, and
659/// continues. A clean parse returns the [`Program`]; otherwise it returns all
660/// errors as [`Diagnostics`], whose `Display` renders each as a caret block with
661/// the keyword's correct syntax.
662pub fn parse(src: &str) -> Result<Program<'_>, Diagnostics> {
663    let mut input = Span::new(src);
664    let mut statements = Vec::new();
665    let mut errors: Vec<Diagnostic> = Vec::new();
666
667    loop {
668        if let Ok((rest, _)) = skip_noise(input) {
669            input = rest;
670        }
671        // Only a whitespace / trailing-comment tail remains — a clean end, not a
672        // statement (the `r#"..."#` fixtures end with an indented closing line).
673        if at_end(input.fragment()) {
674            break;
675        }
676        match statement(input) {
677            Ok((rest, stmt)) => {
678                statements.push(stmt);
679                input = rest;
680            }
681            // Committed: a keyword matched but the rest is wrong → precise message.
682            Err(nom::Err::Failure(p)) => {
683                errors.push(make_diag(src, p.input, p.message, false));
684                input = resync(p.input);
685            }
686            // Recoverable: this line started no statement keyword at all.
687            Err(nom::Err::Error(p)) => {
688                errors.push(make_diag(src, p.input, not_a_statement(), true));
689                input = resync(p.input);
690            }
691            // `complete` combinators never return Incomplete; stop defensively.
692            Err(nom::Err::Incomplete(_)) => break,
693        }
694    }
695
696    if errors.is_empty() {
697        Ok(Program { statements })
698    } else {
699        Err(Diagnostics { file: None, errors })
700    }
701}
702
703/// Whether only an ignorable tail remains: pure whitespace, or a trailing
704/// `//` comment with no following newline. `skip_noise` has already consumed any
705/// full blank/comment lines, so anything else is a real statement to parse.
706fn at_end(frag: &str) -> bool {
707    let t = frag.trim_start();
708    t.is_empty() || (t.starts_with("//") && !t.contains('\n'))
709}
710
711/// Build one [`Diagnostic`] from the failure span. `general` marks a
712/// not-tied-to-a-keyword error (shows the menu of top-level forms instead of a
713/// single syntax card).
714fn make_diag(src: &str, at: Span<'_>, message: String, general: bool) -> Diagnostic {
715    let line = at.location_line() as usize;
716    let col = at.get_column();
717    let line_text = src.lines().nth(line.saturating_sub(1)).unwrap_or("");
718    // The card follows the *message* (which names the construct in question),
719    // not the line the parser stalled on — a stall often lands on the *next*
720    // line (e.g. "expected THEN …" points at the CHECK after a bodyless WHEN),
721    // whose leading word would be a misleading card.
722    let keyword = if general { None } else { keyword_in(&message) };
723    Diagnostic {
724        line,
725        col,
726        width: caret_width(line_text, col),
727        message,
728        keyword,
729        general,
730        line_text: line_text.to_string(),
731    }
732}
733
734/// Caret length: from `col` to the last non-whitespace character of the line (at
735/// least one), so the underline covers the offending token / trailing text.
736fn caret_width(line_text: &str, col: usize) -> usize {
737    let start = col.saturating_sub(1);
738    let trimmed_len = line_text.trim_end().chars().count();
739    trimmed_len.saturating_sub(start).max(1)
740}
741
742/// Resynchronise after an error: skip the rest of the offending line, then any
743/// lines until one begins (after cosmetic indentation) with a top-level keyword,
744/// or EOF. This keeps a broken PREMISE body from cascading into spurious errors.
745fn resync(at: Span<'_>) -> Span<'_> {
746    let mut input = consume_line(at);
747    loop {
748        if input.fragment().is_empty() || starts_top_level(input) {
749            return input;
750        }
751        input = consume_line(input);
752    }
753}
754
755/// Consume through the next line ending (or to EOF if none remains). The
756/// `complete` combinators never fail here, so the `unwrap_or` is unreachable.
757fn consume_line(input: Span<'_>) -> Span<'_> {
758    let parsed: PResult<'_, Span<'_>> =
759        recognize((take_while(|c| c != '\n' && c != '\r'), opt(line_ending))).parse(input);
760    parsed.map(|(rest, _)| rest).unwrap_or(input)
761}
762
763/// Whether `input` begins (after cosmetic indentation) with a top-level keyword.
764fn starts_top_level(input: Span<'_>) -> bool {
765    let after = match space0::<_, Problem<'_>>(input) {
766        Ok((rest, _)) => rest,
767        Err(_) => return false,
768    };
769    let word: String = after
770        .fragment()
771        .chars()
772        .take_while(|c| c.is_ascii_uppercase())
773        .collect();
774    is_top_level(&word)
775}