Skip to main content

elenchus_parser/
lib.rs

1//! elenchus-parser — parses the English-like elenchus DSL into an AST.
2//!
3//! Style mirrors `vsm-parser`: zero-copy over `&str`, `nom` + `nom_locate`
4//! for line/column tracking, and human-friendly syntax diagnostics. Syntax is
5//! line/keyword-oriented (not S-expressions) so small models cannot trip on
6//! parentheses or indentation.
7//!
8//! Grammar (see docs/SPEC.md, "Grammar (EBNF)"):
9//! - statements are newline-terminated; indentation is cosmetic, not significant;
10//! - keywords are ALWAYS CAPS (ASCII); identifiers are content (case-sensitive,
11//!   verbatim, any-script letters — e.g. `условие`, `名前`);
12//! - block boundaries (PREMISE/RULE bodies) are found by keywords, never by indent.
13//!
14//! On error, [`parse`] returns [`Diagnostics`]: *every* syntax error from one
15//! pass, each rendered as a caret block with the keyword's correct syntax (see
16//! [`diag`] and [`syntax`]).
17//!
18//! The crate is split into focused modules — [`ast`] (the tree), [`keywords`]
19//! (the single keyword table: spellings, roles, syntax cards), [`diag`] (error
20//! rendering), and `grammar` (the nom parser + recovering driver) — re-exported
21//! here as a flat public surface.
22//!
23//! # Example
24//!
25//! ```
26//! use elenchus_parser::{Statement, parse};
27//!
28//! // One statement per line; the result is a flat list of `Statement`s.
29//! let program = parse("FACT socrates is human\nCHECK socrates\n").unwrap();
30//! assert_eq!(program.statements.len(), 2);
31//! assert!(matches!(program.statements[0], Statement::Fact(_)));
32//! ```
33#![no_std]
34// Every public item is documented; CI (`clippy -D warnings`) keeps it that way.
35#![warn(missing_docs)]
36
37extern crate alloc;
38
39pub mod ast;
40pub mod diag;
41mod grammar;
42pub mod keywords;
43
44pub use ast::{Atom, Body, Conn, ListOp, Literal, Located, Program, Span, Statement};
45pub use diag::{Diagnostic, Diagnostics};
46pub use grammar::parse;
47pub use keywords::{Card, KEYWORDS, Keyword, card_for, is_reserved, kw};
48
49#[cfg(test)]
50mod tests {
51    use super::*;
52    use alloc::format;
53
54    fn prog(src: &str) -> Program<'_> {
55        parse(src).expect("should parse")
56    }
57
58    /// `(subject, predicate, object?)` of one atom, borrowed from the source.
59    type AtomShape<'a> = (&'a str, &'a str, Option<&'a str>);
60    /// A list premise flattened to `(operator, its atoms)`.
61    type ListShape<'a> = (ListOp, Vec<AtomShape<'a>>);
62
63    use alloc::vec::Vec;
64
65    /// Atom data flattened to owned tuples — span-independent, for structural
66    /// comparison (spans differ by offset, which is exactly what "cosmetic" means).
67    fn atom_shapes<'a>(p: &Program<'a>) -> Vec<ListShape<'a>> {
68        p.statements
69            .iter()
70            .filter_map(|s| match s {
71                Statement::Premise {
72                    body: Body::List { op, atoms },
73                    ..
74                } => Some((
75                    *op,
76                    atoms
77                        .iter()
78                        .map(|a| (a.data.subject, a.data.predicate, a.data.object))
79                        .collect(),
80                )),
81                _ => None,
82            })
83            .collect()
84    }
85
86    #[test]
87    fn parses_fact_and_negation() {
88        let p = prog(
89            r#"
90        FACT Creature_A has flying
91        NOT Creature_A has cold_blood
92        "#,
93        );
94        assert_eq!(p.statements.len(), 2);
95        match &p.statements[0] {
96            Statement::Fact(a) => {
97                assert_eq!(a.data.subject, "Creature_A");
98                assert_eq!(a.data.predicate, "has");
99                assert_eq!(a.data.object, Some("flying"));
100            }
101            other => panic!("expected fact, got {:?}", other),
102        }
103        match &p.statements[1] {
104            Statement::Negation(a) => {
105                assert_eq!(a.data.object, Some("cold_blood"));
106            }
107            other => panic!("expected negation, got {:?}", other),
108        }
109    }
110
111    #[test]
112    fn fact_without_object() {
113        let p = prog("FACT Motor over_100\n");
114        match &p.statements[0] {
115            Statement::Fact(a) => {
116                assert_eq!(a.data.subject, "Motor");
117                assert_eq!(a.data.predicate, "over_100");
118                assert_eq!(a.data.object, None);
119            }
120            other => panic!("expected fact, got {:?}", other),
121        }
122    }
123
124    #[test]
125    fn parses_assume_positive_and_negated() {
126        let p = prog(
127            r#"
128        ASSUME rel in_prod
129        ASSUME NOT rel has_rollback
130        "#,
131        );
132        assert_eq!(p.statements.len(), 2);
133        match &p.statements[0] {
134            Statement::Assume(l) => {
135                assert!(!l.data.negated);
136                assert_eq!(l.data.atom.subject, "rel");
137                assert_eq!(l.data.atom.predicate, "in_prod");
138                assert_eq!(l.data.atom.object, None);
139            }
140            other => panic!("expected assume, got {:?}", other),
141        }
142        match &p.statements[1] {
143            Statement::Assume(l) => {
144                assert!(l.data.negated);
145                assert_eq!(l.data.atom.predicate, "has_rollback");
146            }
147            other => panic!("expected negated assume, got {:?}", other),
148        }
149    }
150
151    #[test]
152    fn assume_is_a_reserved_word() {
153        assert!(parse("FACT ASSUME has x\n").is_err());
154    }
155
156    #[test]
157    fn parses_import() {
158        let p = prog("IMPORT \"physics.vrf\"\n");
159        match &p.statements[0] {
160            Statement::Import { path, alias } => {
161                assert_eq!(path.data, "physics.vrf");
162                assert!(alias.is_none());
163            }
164            other => panic!("expected import, got {:?}", other),
165        }
166    }
167
168    #[test]
169    fn parses_import_with_alias() {
170        let p = prog("IMPORT \"physics.vrf\" AS phys\n");
171        match &p.statements[0] {
172            Statement::Import { path, alias } => {
173                assert_eq!(path.data, "physics.vrf");
174                assert_eq!(alias.as_ref().unwrap().data, "phys");
175            }
176            other => panic!("expected import, got {:?}", other),
177        }
178    }
179
180    #[test]
181    fn parses_domain_declaration() {
182        let p = prog("DOMAIN physics\n");
183        match &p.statements[0] {
184            Statement::Domain(name) => assert_eq!(name.data, "physics"),
185            other => panic!("expected domain, got {:?}", other),
186        }
187    }
188
189    #[test]
190    fn parses_domain_qualified_atom() {
191        // `physics.Motor over_200` → domain prefix split from the subject.
192        let p = prog("FACT physics.Motor over_200\n");
193        match &p.statements[0] {
194            Statement::Fact(a) => {
195                assert_eq!(a.data.domain, Some("physics"));
196                assert_eq!(a.data.subject, "Motor");
197                assert_eq!(a.data.predicate, "over_200");
198            }
199            other => panic!("expected fact, got {:?}", other),
200        }
201    }
202
203    #[test]
204    fn bare_atom_has_no_domain() {
205        let p = prog("FACT engine has_fuel\n");
206        match &p.statements[0] {
207            Statement::Fact(a) => {
208                assert_eq!(a.data.domain, None);
209                assert_eq!(a.data.subject, "engine");
210            }
211            other => panic!("expected fact, got {:?}", other),
212        }
213    }
214
215    #[test]
216    fn domain_is_a_reserved_word() {
217        assert!(parse("FACT DOMAIN has x\n").is_err());
218        assert!(parse("FACT AS has x\n").is_err());
219    }
220
221    #[test]
222    fn parses_exclusive_premise() {
223        let src = r#"
224        PREMISE fly_xor_swim:
225            EXCLUSIVE
226                Creature_A has flying
227                Creature_A has swimming
228        "#;
229        let p = prog(src);
230        match &p.statements[0] {
231            Statement::Premise { name, body } => {
232                assert_eq!(name.data, "fly_xor_swim");
233                match body {
234                    Body::List { op, atoms } => {
235                        assert_eq!(*op, ListOp::Exclusive);
236                        assert_eq!(atoms.len(), 2);
237                        assert_eq!(atoms[1].data.object, Some("swimming"));
238                    }
239                    other => panic!("expected list body, got {:?}", other),
240                }
241            }
242            other => panic!("expected premise, got {:?}", other),
243        }
244    }
245
246    #[test]
247    fn parses_implication_premise_with_and() {
248        let src = r#"
249        PREMISE wings_need_bone:
250            WHEN Creature_A has flying
251            THEN Creature_A has wing
252            AND  Creature_A has bone
253        "#;
254        let p = prog(src);
255        match &p.statements[0] {
256            Statement::Premise {
257                body:
258                    Body::Impl {
259                        antecedent,
260                        consequent,
261                        ..
262                    },
263                ..
264            } => {
265                assert_eq!(antecedent.len(), 1);
266                assert_eq!(antecedent[0].data.atom.object, Some("flying"));
267                assert_eq!(consequent.len(), 2);
268                assert_eq!(consequent[0].data.atom.object, Some("wing"));
269                assert_eq!(consequent[1].data.atom.object, Some("bone"));
270            }
271            other => panic!("expected impl premise, got {:?}", other),
272        }
273    }
274
275    #[test]
276    fn antecedent_and_goes_before_then() {
277        let src = r#"
278        PREMISE deploy:
279            WHEN s tested
280            AND s reviewed
281            THEN s can_deploy
282        "#;
283        let p = prog(src);
284        match &p.statements[0] {
285            Statement::Premise {
286                body:
287                    Body::Impl {
288                        antecedent,
289                        consequent,
290                        ..
291                    },
292                ..
293            } => {
294                assert_eq!(antecedent.len(), 2);
295                assert_eq!(consequent.len(), 1);
296            }
297            other => panic!("unexpected: {:?}", other),
298        }
299    }
300
301    #[test]
302    fn when_or_sets_disjunctive_antecedent() {
303        let src = r#"
304        PREMISE p:
305            WHEN x a
306            OR x b
307            THEN x c
308        "#;
309        match &prog(src).statements[0] {
310            Statement::Premise {
311                body:
312                    Body::Impl {
313                        antecedent,
314                        ante_conn,
315                        consequent,
316                        cons_conn,
317                    },
318                ..
319            } => {
320                assert_eq!(antecedent.len(), 2);
321                assert_eq!(*ante_conn, Conn::Or);
322                assert_eq!(consequent.len(), 1);
323                assert_eq!(*cons_conn, Conn::And); // single consequent → AND
324            }
325            other => panic!("expected impl premise, got {:?}", other),
326        }
327    }
328
329    #[test]
330    fn then_or_sets_disjunctive_consequent() {
331        let src = r#"
332        PREMISE p:
333            WHEN x a
334            THEN x b
335            OR x c
336        "#;
337        match &prog(src).statements[0] {
338            Statement::Premise {
339                body:
340                    Body::Impl {
341                        consequent,
342                        cons_conn,
343                        ..
344                    },
345                ..
346            } => {
347                assert_eq!(consequent.len(), 2);
348                assert_eq!(*cons_conn, Conn::Or);
349            }
350            other => panic!("expected impl premise, got {:?}", other),
351        }
352    }
353
354    #[test]
355    fn mixing_and_or_in_one_group_is_an_error() {
356        let mixed_when = r#"
357        PREMISE p:
358            WHEN x a
359            AND x b
360            OR x c
361            THEN x d
362        "#;
363        let mixed_then = r#"
364        PREMISE p:
365            WHEN x a
366            THEN x b
367            AND x c
368            OR x d
369        "#;
370        assert!(parse(mixed_when).is_err());
371        assert!(parse(mixed_then).is_err());
372    }
373
374    #[test]
375    fn or_is_a_reserved_word() {
376        assert!(parse("FACT OR has x\n").is_err());
377    }
378
379    #[test]
380    fn parses_negated_literal_in_rule() {
381        let src = r#"
382        RULE pick_slow:
383            WHEN NOT Motor over_100
384            THEN Motor uses slow_path
385        "#;
386        let p = prog(src);
387        match &p.statements[0] {
388            Statement::Rule {
389                body: Body::Impl { antecedent, .. },
390                ..
391            } => {
392                assert!(antecedent[0].data.negated);
393                assert_eq!(antecedent[0].data.atom.predicate, "over_100");
394            }
395            other => panic!("expected rule, got {:?}", other),
396        }
397    }
398
399    #[test]
400    fn parses_check_variants() {
401        let p = prog("CHECK Creature_A BIDIRECTIONAL\n");
402        match &p.statements[0] {
403            Statement::Check {
404                subject,
405                bidirectional,
406            } => {
407                assert_eq!(subject.as_ref().unwrap().data, "Creature_A");
408                assert!(bidirectional);
409            }
410            other => panic!("expected check, got {:?}", other),
411        }
412
413        let p = prog("CHECK\n");
414        match &p.statements[0] {
415            Statement::Check {
416                subject,
417                bidirectional,
418            } => {
419                assert!(subject.is_none());
420                assert!(!bidirectional);
421            }
422            other => panic!("expected check, got {:?}", other),
423        }
424    }
425
426    #[test]
427    fn comments_and_blanks_are_ignored() {
428        let src = "// header\n\nFACT a b   // trailing comment\n\n// tail\n";
429        let p = prog(src);
430        assert_eq!(p.statements.len(), 1);
431    }
432
433    #[test]
434    fn indentation_is_cosmetic() {
435        let flat = r#"
436        PREMISE x:
437        EXCLUSIVE
438        a b
439        a c
440        "#;
441        let indented = r#"
442        PREMISE x:
443                EXCLUSIVE
444          a b
445                    a c
446        "#;
447        // Spans differ by offset (cosmetic); the parsed structure must be identical.
448        assert_eq!(atom_shapes(&prog(flat)), atom_shapes(&prog(indented)));
449    }
450
451    #[test]
452    fn top_level_statements_may_be_indented() {
453        // Leading indentation on the FACT/PREMISE/CHECK lines themselves is also
454        // cosmetic (so a whole program can be pasted indented inside a here-doc).
455        let flat = r#"
456        FACT x a
457        NOT x b
458        CHECK x
459        "#;
460        let indented = r#"
461            FACT x a
462                NOT x b
463            CHECK x
464        "#;
465        assert_eq!(atom_shapes(&prog(flat)), atom_shapes(&prog(indented)));
466        assert_eq!(prog(indented).statements.len(), 3);
467    }
468
469    #[test]
470    fn full_creature_example_parses() {
471        let src = include_str!("../../../docs/examples/creature.vrf");
472        let p = prog(src);
473        // 1 DOMAIN + 2 FACT + 3 PREMISE + 1 RULE + 1 CHECK = 8
474        assert_eq!(p.statements.len(), 8);
475    }
476
477    #[test]
478    fn import_demo_example_parses() {
479        let src = include_str!("../../../docs/examples/import-demo.vrf");
480        let p = prog(src);
481        assert!(matches!(p.statements[0], Statement::Domain(_)));
482        assert!(matches!(p.statements[1], Statement::Import { .. }));
483    }
484
485    #[test]
486    fn unicode_identifiers_any_script() {
487        // Cyrillic subject/predicate/object, mixed with `_` and digits (not first).
488        let p = prog(
489            r#"
490        FACT кот пушистый2
491        NOT собака has крылья
492        "#,
493        );
494        match &p.statements[0] {
495            Statement::Fact(a) => {
496                assert_eq!(a.data.subject, "кот");
497                assert_eq!(a.data.predicate, "пушистый2");
498                assert_eq!(a.data.object, None);
499            }
500            other => panic!("expected fact, got {:?}", other),
501        }
502        match &p.statements[1] {
503            Statement::Negation(a) => {
504                assert_eq!(a.data.subject, "собака");
505                assert_eq!(a.data.object, Some("крылья"));
506            }
507            other => panic!("expected negation, got {:?}", other),
508        }
509    }
510
511    #[test]
512    fn unicode_premise_name_and_body() {
513        let src = r#"
514        PREMISE правило_лая:
515            WHEN собака has хвост
516            THEN собака умеет_лаять
517        "#;
518        match &prog(src).statements[0] {
519            Statement::Premise { name, body } => {
520                assert_eq!(name.data, "правило_лая");
521                match body {
522                    Body::Impl {
523                        antecedent,
524                        consequent,
525                        ..
526                    } => {
527                        assert_eq!(antecedent[0].data.atom.subject, "собака");
528                        assert_eq!(consequent[0].data.atom.subject, "собака");
529                        assert_eq!(consequent[0].data.atom.predicate, "умеет_лаять");
530                    }
531                    other => panic!("expected impl body, got {:?}", other),
532                }
533            }
534            other => panic!("expected premise, got {:?}", other),
535        }
536    }
537
538    #[test]
539    fn identifier_cannot_start_with_digit() {
540        // `2cats` is not a valid subject — first char must be a letter.
541        assert!(parse("FACT 2cats has fur\n").is_err());
542    }
543
544    #[test]
545    fn punctuation_is_rejected_in_identifier() {
546        // `!` and other symbols are not identifier characters.
547        assert!(parse("FACT cat! has fur\n").is_err());
548    }
549
550    #[test]
551    fn reserved_word_cannot_be_identifier() {
552        // `WHEN` as a subject is illegal.
553        assert!(parse("FACT WHEN has x\n").is_err());
554    }
555
556    #[test]
557    fn pretty_error_points_at_offending_line() {
558        let src = r#"FACT a b
559!garbage here
560FACT c d
561"#;
562        let err = parse(src).expect_err("should fail");
563        let shown = format!("{}", err);
564        // The new diagnostic format: a RESULT header, the line number, the
565        // verbatim offending line, and a caret pointing at it.
566        assert!(shown.contains("RESULT: 1 syntax error"));
567        assert!(shown.contains("line 2"));
568        assert!(shown.contains("!garbage here"));
569        assert!(shown.contains('^'));
570    }
571
572    #[test]
573    fn collects_every_error_in_one_pass() {
574        // Three broken top-level lines among valid ones → exactly three errors,
575        // no cascade from recovery.
576        let src = "FACT lonely\nFACT a b\nNOT also_lonely\nCHECK\nIMPORT nothx\n";
577        let diags = parse(src).expect_err("should fail");
578        assert_eq!(diags.len(), 3);
579    }
580
581    #[test]
582    fn crlf_line_endings() {
583        let p = prog(
584            r#"
585        FACT a b
586        CHECK a
587        "#,
588        );
589        assert_eq!(p.statements.len(), 2);
590    }
591
592    #[test]
593    fn tabs_as_indentation() {
594        let p = prog(
595            r#"
596        PREMISE e:
597	EXCLUSIVE
598		x a
599		x b
600        "#,
601        );
602        assert!(matches!(
603            p.statements[0],
604            Statement::Premise {
605                body: Body::List {
606                    op: ListOp::Exclusive,
607                    ..
608                },
609                ..
610            }
611        ));
612    }
613
614    #[test]
615    fn parses_all_list_ops() {
616        for (kw, want) in [
617            ("EXCLUSIVE", ListOp::Exclusive),
618            ("FORBIDS", ListOp::Forbids),
619            ("ONEOF", ListOp::OneOf),
620            ("ATLEAST", ListOp::AtLeast),
621        ] {
622            let src = alloc::format!("PREMISE a:\n    {kw}\n        x a\n        x b\n");
623            match &prog(&src).statements[0] {
624                Statement::Premise {
625                    body: Body::List { op, .. },
626                    ..
627                } => assert_eq!(*op, want),
628                other => panic!("{kw}: unexpected {other:?}"),
629            }
630        }
631    }
632
633    #[test]
634    fn check_bidirectional_without_subject() {
635        match &prog("CHECK BIDIRECTIONAL\n").statements[0] {
636            Statement::Check {
637                subject,
638                bidirectional,
639            } => {
640                assert!(subject.is_none());
641                assert!(bidirectional);
642            }
643            other => panic!("unexpected {other:?}"),
644        }
645    }
646
647    #[test]
648    fn empty_and_comment_only_input_yield_no_statements() {
649        assert_eq!(prog("").statements.len(), 0);
650        assert_eq!(prog("// just a comment\n\n   \n").statements.len(), 0);
651    }
652
653    #[test]
654    fn negation_with_object() {
655        match &prog("NOT Creature_A has wing\n").statements[0] {
656            Statement::Negation(a) => {
657                assert_eq!(a.data.subject, "Creature_A");
658                assert_eq!(a.data.object, Some("wing"));
659            }
660            other => panic!("unexpected {other:?}"),
661        }
662    }
663
664    #[test]
665    fn negated_consequent_then_not() {
666        let src = r#"
667        PREMISE a:
668            WHEN x on
669            THEN NOT x off
670        "#;
671        match &prog(src).statements[0] {
672            Statement::Premise {
673                body: Body::Impl { consequent, .. },
674                ..
675            } => {
676                assert!(consequent[0].data.negated);
677                assert_eq!(consequent[0].data.atom.predicate, "off");
678            }
679            other => panic!("unexpected {other:?}"),
680        }
681    }
682
683    #[test]
684    fn multiple_imports_then_facts() {
685        let p = prog(
686            r#"
687        IMPORT "a.vrf"
688        IMPORT "b.vrf"
689        FACT x y
690        "#,
691        );
692        assert!(matches!(p.statements[0], Statement::Import { .. }));
693        assert!(matches!(p.statements[1], Statement::Import { .. }));
694        assert!(matches!(p.statements[2], Statement::Fact(_)));
695    }
696
697    #[test]
698    fn trailing_comment_without_final_newline() {
699        let p = prog("FACT a b\n// trailing, no newline");
700        assert_eq!(p.statements.len(), 1);
701    }
702}