Skip to main content

elenchus_parser/
lib.rs

1//! elenchus-parser — parses the English-like elenchus DSL into an AST.
2//!
3//! Style mirrors `vsm-parser`: zero-copy over `&str`, `nom` + `nom_locate`
4//! for line/column tracking, and human-friendly syntax diagnostics. Syntax is
5//! line/keyword-oriented (not S-expressions) so small models cannot trip on
6//! parentheses or indentation.
7//!
8//! Grammar (see docs/SPEC.md, "Grammar (EBNF)"):
9//! - statements are newline-terminated; indentation is cosmetic, not significant;
10//! - keywords are ALWAYS CAPS (ASCII); identifiers are content (case-sensitive,
11//!   verbatim, any-script letters — e.g. `условие`, `名前`);
12//! - block boundaries (PREMISE/RULE bodies) are found by keywords, never by indent.
13//!
14//! On error, [`parse`] returns [`Diagnostics`]: *every* syntax error from one
15//! pass, each rendered as a caret block with the keyword's correct syntax (see
16//! [`diag`] and [`syntax`]).
17//!
18//! The crate is split into focused modules — [`ast`] (the tree), [`keywords`]
19//! (the single keyword table: spellings, roles, syntax cards), [`diag`] (error
20//! rendering), and `grammar` (the nom parser + recovering driver) — re-exported
21//! here as a flat public surface.
22//!
23//! # Example
24//!
25//! ```
26//! use elenchus_parser::{Statement, parse};
27//!
28//! // One statement per line; the result is a flat list of `Statement`s.
29//! let program = parse("FACT socrates is human\nCHECK socrates\n").unwrap();
30//! assert_eq!(program.statements.len(), 2);
31//! assert!(matches!(program.statements[0], Statement::Fact(_)));
32//! ```
33#![no_std]
34// Every public item is documented; CI (`clippy -D warnings`) keeps it that way.
35#![warn(missing_docs)]
36
37extern crate alloc;
38
39pub mod ast;
40pub mod diag;
41mod grammar;
42pub mod keywords;
43
44pub use ast::{
45    Atom, Body, CloseKind, Conn, ListOp, Literal, Located, Program, Quant, Span, Statement,
46};
47pub use diag::{Diagnostic, Diagnostics};
48pub use grammar::parse;
49pub use keywords::{Card, KEYWORDS, Keyword, card_for, is_reserved, kw};
50
51#[cfg(test)]
52mod tests {
53    use super::*;
54    use alloc::format;
55
56    fn prog(src: &str) -> Program<'_> {
57        parse(src).expect("should parse")
58    }
59
60    /// `(subject, predicate, object?)` of one atom, borrowed from the source.
61    type AtomShape<'a> = (&'a str, &'a str, Option<&'a str>);
62    /// A list premise flattened to `(operator, its atoms)`.
63    type ListShape<'a> = (ListOp, Vec<AtomShape<'a>>);
64
65    use alloc::vec::Vec;
66
67    /// Atom data flattened to owned tuples — span-independent, for structural
68    /// comparison (spans differ by offset, which is exactly what "cosmetic" means).
69    fn atom_shapes<'a>(p: &Program<'a>) -> Vec<ListShape<'a>> {
70        p.statements
71            .iter()
72            .filter_map(|s| match s {
73                Statement::Premise {
74                    body: Body::List { op, atoms },
75                    ..
76                } => Some((
77                    *op,
78                    atoms
79                        .iter()
80                        .map(|a| (a.data.subject, a.data.predicate, a.data.object))
81                        .collect(),
82                )),
83                _ => None,
84            })
85            .collect()
86    }
87
88    #[test]
89    fn parses_fact_and_negation() {
90        let p = prog(
91            r#"
92        FACT Creature_A has flying
93        NOT Creature_A has cold_blood
94        "#,
95        );
96        assert_eq!(p.statements.len(), 2);
97        match &p.statements[0] {
98            Statement::Fact(a) => {
99                assert_eq!(a.data.subject, "Creature_A");
100                assert_eq!(a.data.predicate, "has");
101                assert_eq!(a.data.object, Some("flying"));
102            }
103            other => panic!("expected fact, got {:?}", other),
104        }
105        match &p.statements[1] {
106            Statement::Negation(a) => {
107                assert_eq!(a.data.object, Some("cold_blood"));
108            }
109            other => panic!("expected negation, got {:?}", other),
110        }
111    }
112
113    #[test]
114    fn fact_without_object() {
115        let p = prog("FACT Motor over_100\n");
116        match &p.statements[0] {
117            Statement::Fact(a) => {
118                assert_eq!(a.data.subject, "Motor");
119                assert_eq!(a.data.predicate, "over_100");
120                assert_eq!(a.data.object, None);
121            }
122            other => panic!("expected fact, got {:?}", other),
123        }
124    }
125
126    #[test]
127    fn parses_assume_positive_and_negated() {
128        let p = prog(
129            r#"
130        ASSUME rel in_prod
131        ASSUME NOT rel has_rollback
132        "#,
133        );
134        assert_eq!(p.statements.len(), 2);
135        match &p.statements[0] {
136            Statement::Assume(l) => {
137                assert!(!l.data.negated);
138                assert_eq!(l.data.atom.subject, "rel");
139                assert_eq!(l.data.atom.predicate, "in_prod");
140                assert_eq!(l.data.atom.object, None);
141            }
142            other => panic!("expected assume, got {:?}", other),
143        }
144        match &p.statements[1] {
145            Statement::Assume(l) => {
146                assert!(l.data.negated);
147                assert_eq!(l.data.atom.predicate, "has_rollback");
148            }
149            other => panic!("expected negated assume, got {:?}", other),
150        }
151    }
152
153    #[test]
154    fn assume_is_a_reserved_word() {
155        assert!(parse("FACT ASSUME has x\n").is_err());
156    }
157
158    #[test]
159    fn parses_import() {
160        let p = prog("IMPORT \"physics.vrf\"\n");
161        match &p.statements[0] {
162            Statement::Import { path, alias } => {
163                assert_eq!(path.data, "physics.vrf");
164                assert!(alias.is_none());
165            }
166            other => panic!("expected import, got {:?}", other),
167        }
168    }
169
170    #[test]
171    fn parses_import_with_alias() {
172        let p = prog("IMPORT \"physics.vrf\" AS phys\n");
173        match &p.statements[0] {
174            Statement::Import { path, alias } => {
175                assert_eq!(path.data, "physics.vrf");
176                assert_eq!(alias.as_ref().unwrap().data, "phys");
177            }
178            other => panic!("expected import, got {:?}", other),
179        }
180    }
181
182    #[test]
183    fn parses_domain_declaration() {
184        let p = prog("DOMAIN physics\n");
185        match &p.statements[0] {
186            Statement::Domain(name) => assert_eq!(name.data, "physics"),
187            other => panic!("expected domain, got {:?}", other),
188        }
189    }
190
191    #[test]
192    fn parses_domain_qualified_atom() {
193        // `physics.Motor over_200` → domain prefix split from the subject.
194        let p = prog("FACT physics.Motor over_200\n");
195        match &p.statements[0] {
196            Statement::Fact(a) => {
197                assert_eq!(a.data.domain, Some("physics"));
198                assert_eq!(a.data.subject, "Motor");
199                assert_eq!(a.data.predicate, "over_200");
200            }
201            other => panic!("expected fact, got {:?}", other),
202        }
203    }
204
205    #[test]
206    fn bare_atom_has_no_domain() {
207        let p = prog("FACT engine has_fuel\n");
208        match &p.statements[0] {
209            Statement::Fact(a) => {
210                assert_eq!(a.data.domain, None);
211                assert_eq!(a.data.subject, "engine");
212            }
213            other => panic!("expected fact, got {:?}", other),
214        }
215    }
216
217    #[test]
218    fn domain_is_a_reserved_word() {
219        assert!(parse("FACT DOMAIN has x\n").is_err());
220        assert!(parse("FACT AS has x\n").is_err());
221    }
222
223    #[test]
224    fn parses_exclusive_premise() {
225        let src = r#"
226        PREMISE fly_xor_swim:
227            EXCLUSIVE
228                Creature_A has flying
229                Creature_A has swimming
230        "#;
231        let p = prog(src);
232        match &p.statements[0] {
233            Statement::Premise { name, body, .. } => {
234                assert_eq!(name.data, "fly_xor_swim");
235                match body {
236                    Body::List { op, atoms } => {
237                        assert_eq!(*op, ListOp::Exclusive);
238                        assert_eq!(atoms.len(), 2);
239                        assert_eq!(atoms[1].data.object, Some("swimming"));
240                    }
241                    other => panic!("expected list body, got {:?}", other),
242                }
243            }
244            other => panic!("expected premise, got {:?}", other),
245        }
246    }
247
248    #[test]
249    fn parses_implication_premise_with_and() {
250        let src = r#"
251        PREMISE wings_need_bone:
252            WHEN Creature_A has flying
253            THEN Creature_A has wing
254            AND  Creature_A has bone
255        "#;
256        let p = prog(src);
257        match &p.statements[0] {
258            Statement::Premise {
259                body:
260                    Body::Impl {
261                        antecedent,
262                        consequent,
263                        ..
264                    },
265                ..
266            } => {
267                assert_eq!(antecedent.len(), 1);
268                assert_eq!(antecedent[0].data.atom.object, Some("flying"));
269                assert_eq!(consequent.len(), 2);
270                assert_eq!(consequent[0].data.atom.object, Some("wing"));
271                assert_eq!(consequent[1].data.atom.object, Some("bone"));
272            }
273            other => panic!("expected impl premise, got {:?}", other),
274        }
275    }
276
277    #[test]
278    fn antecedent_and_goes_before_then() {
279        let src = r#"
280        PREMISE deploy:
281            WHEN s tested
282            AND s reviewed
283            THEN s can_deploy
284        "#;
285        let p = prog(src);
286        match &p.statements[0] {
287            Statement::Premise {
288                body:
289                    Body::Impl {
290                        antecedent,
291                        consequent,
292                        ..
293                    },
294                ..
295            } => {
296                assert_eq!(antecedent.len(), 2);
297                assert_eq!(consequent.len(), 1);
298            }
299            other => panic!("unexpected: {:?}", other),
300        }
301    }
302
303    #[test]
304    fn when_or_sets_disjunctive_antecedent() {
305        let src = r#"
306        PREMISE p:
307            WHEN x a
308            OR x b
309            THEN x c
310        "#;
311        match &prog(src).statements[0] {
312            Statement::Premise {
313                body:
314                    Body::Impl {
315                        antecedent,
316                        ante_conn,
317                        consequent,
318                        cons_conn,
319                    },
320                ..
321            } => {
322                assert_eq!(antecedent.len(), 2);
323                assert_eq!(*ante_conn, Conn::Or);
324                assert_eq!(consequent.len(), 1);
325                assert_eq!(*cons_conn, Conn::And); // single consequent → AND
326            }
327            other => panic!("expected impl premise, got {:?}", other),
328        }
329    }
330
331    #[test]
332    fn then_or_sets_disjunctive_consequent() {
333        let src = r#"
334        PREMISE p:
335            WHEN x a
336            THEN x b
337            OR x c
338        "#;
339        match &prog(src).statements[0] {
340            Statement::Premise {
341                body:
342                    Body::Impl {
343                        consequent,
344                        cons_conn,
345                        ..
346                    },
347                ..
348            } => {
349                assert_eq!(consequent.len(), 2);
350                assert_eq!(*cons_conn, Conn::Or);
351            }
352            other => panic!("expected impl premise, got {:?}", other),
353        }
354    }
355
356    #[test]
357    fn mixing_and_or_in_one_group_is_an_error() {
358        let mixed_when = r#"
359        PREMISE p:
360            WHEN x a
361            AND x b
362            OR x c
363            THEN x d
364        "#;
365        let mixed_then = r#"
366        PREMISE p:
367            WHEN x a
368            THEN x b
369            AND x c
370            OR x d
371        "#;
372        assert!(parse(mixed_when).is_err());
373        assert!(parse(mixed_then).is_err());
374    }
375
376    #[test]
377    fn or_is_a_reserved_word() {
378        assert!(parse("FACT OR has x\n").is_err());
379    }
380
381    #[test]
382    fn parses_negated_literal_in_rule() {
383        let src = r#"
384        RULE pick_slow:
385            WHEN NOT Motor over_100
386            THEN Motor uses slow_path
387        "#;
388        let p = prog(src);
389        match &p.statements[0] {
390            Statement::Rule {
391                body: Body::Impl { antecedent, .. },
392                ..
393            } => {
394                assert!(antecedent[0].data.negated);
395                assert_eq!(antecedent[0].data.atom.predicate, "over_100");
396            }
397            other => panic!("expected rule, got {:?}", other),
398        }
399    }
400
401    #[test]
402    fn parses_check_variants() {
403        let p = prog("CHECK Creature_A BIDIRECTIONAL\n");
404        match &p.statements[0] {
405            Statement::Check {
406                subject,
407                bidirectional,
408            } => {
409                assert_eq!(subject.as_ref().unwrap().data, "Creature_A");
410                assert!(bidirectional);
411            }
412            other => panic!("expected check, got {:?}", other),
413        }
414
415        let p = prog("CHECK\n");
416        match &p.statements[0] {
417            Statement::Check {
418                subject,
419                bidirectional,
420            } => {
421                assert!(subject.is_none());
422                assert!(!bidirectional);
423            }
424            other => panic!("expected check, got {:?}", other),
425        }
426    }
427
428    #[test]
429    fn comments_and_blanks_are_ignored() {
430        let src = "// header\n\nFACT a b   // trailing comment\n\n// tail\n";
431        let p = prog(src);
432        assert_eq!(p.statements.len(), 1);
433    }
434
435    #[test]
436    fn indentation_is_cosmetic() {
437        let flat = r#"
438        PREMISE x:
439        EXCLUSIVE
440        a b
441        a c
442        "#;
443        let indented = r#"
444        PREMISE x:
445                EXCLUSIVE
446          a b
447                    a c
448        "#;
449        // Spans differ by offset (cosmetic); the parsed structure must be identical.
450        assert_eq!(atom_shapes(&prog(flat)), atom_shapes(&prog(indented)));
451    }
452
453    #[test]
454    fn top_level_statements_may_be_indented() {
455        // Leading indentation on the FACT/PREMISE/CHECK lines themselves is also
456        // cosmetic (so a whole program can be pasted indented inside a here-doc).
457        let flat = r#"
458        FACT x a
459        NOT x b
460        CHECK x
461        "#;
462        let indented = r#"
463            FACT x a
464                NOT x b
465            CHECK x
466        "#;
467        assert_eq!(atom_shapes(&prog(flat)), atom_shapes(&prog(indented)));
468        assert_eq!(prog(indented).statements.len(), 3);
469    }
470
471    #[test]
472    fn full_creature_example_parses() {
473        let src = include_str!("../../../docs/examples/creature.vrf");
474        let p = prog(src);
475        // 1 DOMAIN + 2 FACT + 3 PREMISE + 1 RULE + 1 CHECK = 8
476        assert_eq!(p.statements.len(), 8);
477    }
478
479    #[test]
480    fn import_demo_example_parses() {
481        let src = include_str!("../../../docs/examples/import-demo.vrf");
482        let p = prog(src);
483        assert!(matches!(p.statements[0], Statement::Domain(_)));
484        assert!(matches!(p.statements[1], Statement::Import { .. }));
485    }
486
487    #[test]
488    fn unicode_identifiers_any_script() {
489        // Cyrillic subject/predicate/object, mixed with `_` and digits (not first).
490        let p = prog(
491            r#"
492        FACT кот пушистый2
493        NOT собака has крылья
494        "#,
495        );
496        match &p.statements[0] {
497            Statement::Fact(a) => {
498                assert_eq!(a.data.subject, "кот");
499                assert_eq!(a.data.predicate, "пушистый2");
500                assert_eq!(a.data.object, None);
501            }
502            other => panic!("expected fact, got {:?}", other),
503        }
504        match &p.statements[1] {
505            Statement::Negation(a) => {
506                assert_eq!(a.data.subject, "собака");
507                assert_eq!(a.data.object, Some("крылья"));
508            }
509            other => panic!("expected negation, got {:?}", other),
510        }
511    }
512
513    #[test]
514    fn unicode_premise_name_and_body() {
515        let src = r#"
516        PREMISE правило_лая:
517            WHEN собака has хвост
518            THEN собака умеет_лаять
519        "#;
520        match &prog(src).statements[0] {
521            Statement::Premise { name, body, .. } => {
522                assert_eq!(name.data, "правило_лая");
523                match body {
524                    Body::Impl {
525                        antecedent,
526                        consequent,
527                        ..
528                    } => {
529                        assert_eq!(antecedent[0].data.atom.subject, "собака");
530                        assert_eq!(consequent[0].data.atom.subject, "собака");
531                        assert_eq!(consequent[0].data.atom.predicate, "умеет_лаять");
532                    }
533                    other => panic!("expected impl body, got {:?}", other),
534                }
535            }
536            other => panic!("expected premise, got {:?}", other),
537        }
538    }
539
540    #[test]
541    fn identifier_cannot_start_with_digit() {
542        // `2cats` is not a valid subject — first char must be a letter.
543        assert!(parse("FACT 2cats has fur\n").is_err());
544    }
545
546    #[test]
547    fn punctuation_is_rejected_in_identifier() {
548        // `!` and other symbols are not identifier characters.
549        assert!(parse("FACT cat! has fur\n").is_err());
550    }
551
552    #[test]
553    fn reserved_word_cannot_be_identifier() {
554        // `WHEN` as a subject is illegal.
555        assert!(parse("FACT WHEN has x\n").is_err());
556    }
557
558    #[test]
559    fn pretty_error_points_at_offending_line() {
560        let src = r#"FACT a b
561!garbage here
562FACT c d
563"#;
564        let err = parse(src).expect_err("should fail");
565        let shown = format!("{}", err);
566        // The new diagnostic format: a RESULT header, the line number, the
567        // verbatim offending line, and a caret pointing at it.
568        assert!(shown.contains("RESULT: 1 syntax error"));
569        assert!(shown.contains("line 2"));
570        assert!(shown.contains("!garbage here"));
571        assert!(shown.contains('^'));
572    }
573
574    #[test]
575    fn collects_every_error_in_one_pass() {
576        // Three broken top-level lines among valid ones → exactly three errors,
577        // no cascade from recovery.
578        let src = "FACT lonely\nFACT a b\nNOT also_lonely\nCHECK\nIMPORT nothx\n";
579        let diags = parse(src).expect_err("should fail");
580        assert_eq!(diags.len(), 3);
581    }
582
583    #[test]
584    fn crlf_line_endings() {
585        let p = prog(
586            r#"
587        FACT a b
588        CHECK a
589        "#,
590        );
591        assert_eq!(p.statements.len(), 2);
592    }
593
594    #[test]
595    fn tabs_as_indentation() {
596        let p = prog(
597            r#"
598        PREMISE e:
599	EXCLUSIVE
600		x a
601		x b
602        "#,
603        );
604        assert!(matches!(
605            p.statements[0],
606            Statement::Premise {
607                body: Body::List {
608                    op: ListOp::Exclusive,
609                    ..
610                },
611                ..
612            }
613        ));
614    }
615
616    #[test]
617    fn parses_all_list_ops() {
618        for (kw, want) in [
619            ("EXCLUSIVE", ListOp::Exclusive),
620            ("FORBIDS", ListOp::Forbids),
621            ("ONEOF", ListOp::OneOf),
622            ("ATLEAST", ListOp::AtLeast),
623        ] {
624            let src = alloc::format!("PREMISE a:\n    {kw}\n        x a\n        x b\n");
625            match &prog(&src).statements[0] {
626                Statement::Premise {
627                    body: Body::List { op, .. },
628                    ..
629                } => assert_eq!(*op, want),
630                other => panic!("{kw}: unexpected {other:?}"),
631            }
632        }
633    }
634
635    #[test]
636    fn check_bidirectional_without_subject() {
637        match &prog("CHECK BIDIRECTIONAL\n").statements[0] {
638            Statement::Check {
639                subject,
640                bidirectional,
641            } => {
642                assert!(subject.is_none());
643                assert!(bidirectional);
644            }
645            other => panic!("unexpected {other:?}"),
646        }
647    }
648
649    #[test]
650    fn empty_and_comment_only_input_yield_no_statements() {
651        assert_eq!(prog("").statements.len(), 0);
652        assert_eq!(prog("// just a comment\n\n   \n").statements.len(), 0);
653    }
654
655    #[test]
656    fn negation_with_object() {
657        match &prog("NOT Creature_A has wing\n").statements[0] {
658            Statement::Negation(a) => {
659                assert_eq!(a.data.subject, "Creature_A");
660                assert_eq!(a.data.object, Some("wing"));
661            }
662            other => panic!("unexpected {other:?}"),
663        }
664    }
665
666    #[test]
667    fn negated_consequent_then_not() {
668        let src = r#"
669        PREMISE a:
670            WHEN x on
671            THEN NOT x off
672        "#;
673        match &prog(src).statements[0] {
674            Statement::Premise {
675                body: Body::Impl { consequent, .. },
676                ..
677            } => {
678                assert!(consequent[0].data.negated);
679                assert_eq!(consequent[0].data.atom.predicate, "off");
680            }
681            other => panic!("unexpected {other:?}"),
682        }
683    }
684
685    #[test]
686    fn multiple_imports_then_facts() {
687        let p = prog(
688            r#"
689        IMPORT "a.vrf"
690        IMPORT "b.vrf"
691        FACT x y
692        "#,
693        );
694        assert!(matches!(p.statements[0], Statement::Import { .. }));
695        assert!(matches!(p.statements[1], Statement::Import { .. }));
696        assert!(matches!(p.statements[2], Statement::Fact(_)));
697    }
698
699    #[test]
700    fn trailing_comment_without_final_newline() {
701        let p = prog("FACT a b\n// trailing, no newline");
702        assert_eq!(p.statements.len(), 1);
703    }
704
705    #[test]
706    fn set_declaration_parses() {
707        let p = prog(
708            r"
709        SET tasks
710            deploy
711            backup
712        ",
713        );
714        match &p.statements[0] {
715            Statement::Set { name, elements } => {
716                assert_eq!(name.data, "tasks");
717                let got: Vec<&str> = elements.iter().map(|e| e.data).collect();
718                assert_eq!(got, ["deploy", "backup"]);
719            }
720            other => panic!("expected a SET, got {other:?}"),
721        }
722    }
723
724    #[test]
725    fn close_statement_parses() {
726        let p = prog("CLOSE depends_on TRANSITIVE\n");
727        match &p.statements[0] {
728            Statement::Close { relation, kind } => {
729                assert_eq!(relation.data, "depends_on");
730                assert_eq!(*kind, CloseKind::Transitive);
731            }
732            other => panic!("expected a CLOSE, got {other:?}"),
733        }
734    }
735
736    #[test]
737    fn for_each_header_parses_into_a_quant() {
738        let p = prog(
739            r"
740        PREMISE p FOR EACH t IN tasks:
741            ONEOF
742                t s a
743                t s b
744        ",
745        );
746        match &p.statements[0] {
747            Statement::Premise {
748                quant: Some(Quant::InSet { binder, set }),
749                ..
750            } => {
751                assert_eq!(binder.data, "t");
752                assert_eq!(set.data, "tasks");
753            }
754            other => panic!("expected a quantified premise, got {other:?}"),
755        }
756    }
757
758    #[test]
759    fn for_each_relation_header_parses_into_a_quant() {
760        let p = prog(
761            r"
762        PREMISE p FOR EACH x linked y:
763            FORBIDS
764                x a
765                y a
766        ",
767        );
768        match &p.statements[0] {
769            Statement::Premise {
770                quant:
771                    Some(Quant::Relation {
772                        left,
773                        predicate,
774                        right,
775                    }),
776                ..
777            } => {
778                assert_eq!(left.data, "x");
779                assert_eq!(predicate.data, "linked");
780                assert_eq!(right.data, "y");
781            }
782            other => panic!("expected a relation-quantified premise, got {other:?}"),
783        }
784    }
785
786    #[test]
787    fn an_unquantified_premise_has_no_quant() {
788        let p = prog(
789            r"
790        PREMISE p:
791            ONEOF
792                x s a
793                x s b
794        ",
795        );
796        assert!(matches!(
797            &p.statements[0],
798            Statement::Premise { quant: None, .. }
799        ));
800    }
801
802    #[test]
803    fn a_malformed_for_each_is_a_committed_error() {
804        // FOR without EACH commits (FOR is reserved, only a quantifier here).
805        assert!(
806            parse(
807                r"
808        PREMISE p FOR t IN tasks:
809            ONEOF
810                t s a
811                t s b
812        "
813            )
814            .is_err()
815        );
816    }
817}