Skip to main content

lemma/parsing/
mod.rs

1use crate::error::Error;
2use crate::limits::ResourceLimits;
3
4pub mod ast;
5pub mod lexer;
6pub mod parser;
7pub mod source;
8
9pub use ast::{DepthTracker, Span};
10pub use source::Source;
11
12pub use ast::*;
13pub use parser::ParseResult;
14
15pub fn parse(
16    content: &str,
17    attribute: &str,
18    limits: &ResourceLimits,
19) -> Result<ParseResult, Error> {
20    parser::parse(content, attribute, limits)
21}
22
23// ============================================================================
24// Tests
25// ============================================================================
26
27#[cfg(test)]
28mod tests {
29    use super::parse;
30    use crate::Error;
31    use crate::ResourceLimits;
32
33    #[test]
34    fn parse_empty_input_returns_no_specs() {
35        let result = parse("", "test.lemma", &ResourceLimits::default())
36            .unwrap()
37            .specs;
38        assert_eq!(result.len(), 0);
39    }
40
41    #[test]
42    fn parse_workspace_file_yields_expected_spec_datas_and_rules() {
43        let input = r#"spec person
44data name: "John Doe"
45rule adult: true"#;
46        let result = parse(input, "test.lemma", &ResourceLimits::default())
47            .unwrap()
48            .specs;
49        assert_eq!(result.len(), 1);
50        assert_eq!(result[0].name, "person");
51        assert_eq!(result[0].data.len(), 1);
52        assert_eq!(result[0].rules.len(), 1);
53        assert_eq!(result[0].rules[0].name, "adult");
54    }
55
56    #[test]
57    fn mixing_data_and_rules_is_collected_into_spec() {
58        let input = r#"spec test
59data name: "John"
60rule is_adult: age >= 18
61data age: 25
62rule can_drink: age >= 21
63data status: "active"
64rule is_eligible: is_adult and status is "active""#;
65
66        let result = parse(input, "test.lemma", &ResourceLimits::default())
67            .unwrap()
68            .specs;
69        assert_eq!(result.len(), 1);
70        assert_eq!(result[0].data.len(), 3);
71        assert_eq!(result[0].rules.len(), 3);
72    }
73
74    #[test]
75    fn parse_simple_spec_collects_data() {
76        let input = r#"spec person
77data name: "John"
78data age: 25"#;
79        let result = parse(input, "test.lemma", &ResourceLimits::default())
80            .unwrap()
81            .specs;
82        assert_eq!(result.len(), 1);
83        assert_eq!(result[0].name, "person");
84        assert_eq!(result[0].data.len(), 2);
85    }
86
87    #[test]
88    fn parse_spec_name_with_slashes_is_preserved() {
89        let input = r#"spec contracts/employment/jack
90data name: "Jack""#;
91        let result = parse(input, "test.lemma", &ResourceLimits::default())
92            .unwrap()
93            .specs;
94        assert_eq!(result.len(), 1);
95        assert_eq!(result[0].name, "contracts/employment/jack");
96    }
97
98    #[test]
99    fn parse_spec_name_no_version_tag() {
100        let input = "spec myspec\nrule x: 1";
101        let result = parse(input, "test.lemma", &ResourceLimits::default())
102            .unwrap()
103            .specs;
104        assert_eq!(result.len(), 1);
105        assert_eq!(result[0].name, "myspec");
106        assert_eq!(result[0].effective_from(), None);
107    }
108
109    #[test]
110    fn parse_commentary_block_is_attached_to_spec() {
111        let input = r#"spec person
112"""
113This is a markdown comment
114with **bold** text
115"""
116data name: "John""#;
117        let result = parse(input, "test.lemma", &ResourceLimits::default())
118            .unwrap()
119            .specs;
120        assert_eq!(result.len(), 1);
121        assert!(result[0].commentary.is_some());
122        assert!(result[0].commentary.as_ref().unwrap().contains("**bold**"));
123    }
124
125    #[test]
126    fn parse_spec_with_rule_collects_rule() {
127        let input = r#"spec person
128rule is_adult: age >= 18"#;
129        let result = parse(input, "test.lemma", &ResourceLimits::default())
130            .unwrap()
131            .specs;
132        assert_eq!(result.len(), 1);
133        assert_eq!(result[0].rules.len(), 1);
134        assert_eq!(result[0].rules[0].name, "is_adult");
135    }
136
137    #[test]
138    fn parse_multiple_specs_returns_all_specs() {
139        let input = r#"spec person
140data name: "John"
141
142spec company
143data name: "Acme Corp""#;
144        let result = parse(input, "test.lemma", &ResourceLimits::default())
145            .unwrap()
146            .specs;
147        assert_eq!(result.len(), 2);
148        assert_eq!(result[0].name, "person");
149        assert_eq!(result[1].name, "company");
150    }
151
152    #[test]
153    fn parse_allows_duplicate_data_names() {
154        let input = r#"spec person
155data name: "John"
156data name: "Jane""#;
157        let result = parse(input, "test.lemma", &ResourceLimits::default());
158        assert!(
159            result.is_ok(),
160            "Parser should succeed even with duplicate data"
161        );
162    }
163
164    #[test]
165    fn parse_allows_duplicate_rule_names() {
166        let input = r#"spec person
167rule is_adult: age >= 18
168rule is_adult: age >= 21"#;
169        let result = parse(input, "test.lemma", &ResourceLimits::default());
170        assert!(
171            result.is_ok(),
172            "Parser should succeed even with duplicate rules"
173        );
174    }
175
176    #[test]
177    fn parse_rejects_malformed_input() {
178        let input = "invalid syntax here";
179        let result = parse(input, "test.lemma", &ResourceLimits::default());
180        assert!(result.is_err());
181    }
182
183    #[test]
184    fn parse_handles_whitespace_variants_in_expressions() {
185        let test_cases = vec![
186            ("spec test\nrule test: 2+3", "no spaces in arithmetic"),
187            ("spec test\nrule test: age>=18", "no spaces in comparison"),
188            (
189                "spec test\nrule test: age >= 18 and salary>50000",
190                "spaces around and keyword",
191            ),
192            (
193                "spec test\nrule test: age  >=  18  and  salary  >  50000",
194                "extra spaces",
195            ),
196            (
197                "spec test\nrule test: \n  age >= 18 \n  and \n  salary > 50000",
198                "newlines in expression",
199            ),
200        ];
201
202        for (input, description) in test_cases {
203            let result = parse(input, "test.lemma", &ResourceLimits::default());
204            assert!(
205                result.is_ok(),
206                "Failed to parse {} ({}): {:?}",
207                input,
208                description,
209                result.err()
210            );
211        }
212    }
213
214    #[test]
215    fn parse_error_cases_are_rejected() {
216        let error_cases = vec![
217            (
218                "spec test\ndata name: \"unclosed string",
219                "unclosed string literal",
220            ),
221            ("spec test\nrule test: (2 + 3", "unclosed parenthesis"),
222            ("spec test\nrule test: 2 + 3)", "extra closing paren"),
223            ("spec test\ndata spec: 123", "reserved keyword as data name"),
224            (
225                "spec test\nrule rule: true",
226                "reserved keyword as rule name",
227            ),
228        ];
229
230        for (input, description) in error_cases {
231            let result = parse(input, "test.lemma", &ResourceLimits::default());
232            assert!(
233                result.is_err(),
234                "Expected error for {} but got success",
235                description
236            );
237        }
238    }
239
240    #[test]
241    fn parse_duration_literals_in_rules() {
242        let test_cases = vec![
243            ("2 years", "years"),
244            ("6 months", "months"),
245            ("52 weeks", "weeks"),
246            ("365 days", "days"),
247            ("24 hours", "hours"),
248            ("60 minutes", "minutes"),
249            ("3600 seconds", "seconds"),
250            ("1000 milliseconds", "milliseconds"),
251            ("500000 microseconds", "microseconds"),
252            ("50 percent", "percent"),
253        ];
254
255        for (expr, description) in test_cases {
256            let input = format!("spec test\nrule test: {}", expr);
257            let result = parse(&input, "test.lemma", &ResourceLimits::default());
258            assert!(
259                result.is_ok(),
260                "Failed to parse literal {} ({}): {:?}",
261                expr,
262                description,
263                result.err()
264            );
265        }
266    }
267
268    #[test]
269    fn parse_comparisons_with_duration_unit_conversions() {
270        let test_cases = vec![
271            (
272                "(duration in hours) > 2",
273                "duration conversion in comparison with parens",
274            ),
275            (
276                "(meeting_time in minutes) >= 30",
277                "duration conversion with gte",
278            ),
279            (
280                "(project_length in days) < 100",
281                "duration conversion with lt",
282            ),
283            (
284                "(delay in seconds) is 60",
285                "duration conversion with equality",
286            ),
287            (
288                "(1 hours) > (30 minutes)",
289                "duration conversions on both sides",
290            ),
291            (
292                "duration in hours > 2",
293                "duration conversion without parens",
294            ),
295            (
296                "meeting_time in seconds > 3600",
297                "variable duration conversion in comparison",
298            ),
299            (
300                "project_length in days > deadline_days",
301                "two variables with duration conversion",
302            ),
303            (
304                "duration in hours >= 1 and duration in hours <= 8",
305                "multiple duration comparisons",
306            ),
307        ];
308
309        for (expr, description) in test_cases {
310            let input = format!("spec test\nrule test: {}", expr);
311            let result = parse(&input, "test.lemma", &ResourceLimits::default());
312            assert!(
313                result.is_ok(),
314                "Failed to parse {} ({}): {:?}",
315                expr,
316                description,
317                result.err()
318            );
319        }
320    }
321
322    #[test]
323    fn parse_error_includes_attribute_and_parse_error_spec_name() {
324        let result = parse(
325            r#"
326spec test
327data name: "Unclosed string
328data age: 25
329"#,
330            "test.lemma",
331            &ResourceLimits::default(),
332        );
333
334        match result {
335            Err(Error::Parsing(details)) => {
336                let src = details
337                    .source
338                    .as_ref()
339                    .expect("BUG: parsing errors always have source");
340                assert_eq!(src.attribute, "test.lemma");
341            }
342            Err(e) => panic!("Expected Parse error, got: {e:?}"),
343            Ok(_) => panic!("Expected parse error for unclosed string"),
344        }
345    }
346
347    #[test]
348    fn parse_registry_style_spec_name() {
349        let input = r#"spec user/workspace/somespec
350data name: "Alice""#;
351        let result = parse(input, "test.lemma", &ResourceLimits::default())
352            .unwrap()
353            .specs;
354        assert_eq!(result.len(), 1);
355        assert_eq!(result[0].name, "user/workspace/somespec");
356    }
357
358    #[test]
359    fn parse_with_registry_spec_explicit_alias() {
360        let input = r#"spec example
361with external: @user/workspace/somespec"#;
362        let result = parse(input, "test.lemma", &ResourceLimits::default())
363            .unwrap()
364            .specs;
365        assert_eq!(result.len(), 1);
366        assert_eq!(result[0].data.len(), 1);
367        match &result[0].data[0].value {
368            crate::parsing::ast::DataValue::SpecReference(spec_ref) => {
369                assert_eq!(spec_ref.name, "@user/workspace/somespec");
370                assert!(spec_ref.from_registry, "expected registry reference");
371            }
372            other => panic!("Expected SpecReference, got: {:?}", other),
373        }
374    }
375
376    #[test]
377    fn parse_multiple_registry_specs_in_same_file() {
378        let input = r#"spec user/workspace/spec_a
379data x: 10
380
381spec user/workspace/spec_b
382data y: 20
383with a: @user/workspace/spec_a"#;
384        let result = parse(input, "test.lemma", &ResourceLimits::default())
385            .unwrap()
386            .specs;
387        assert_eq!(result.len(), 2);
388        assert_eq!(result[0].name, "user/workspace/spec_a");
389        assert_eq!(result[1].name, "user/workspace/spec_b");
390    }
391
392    #[test]
393    fn parse_with_registry_spec_default_alias() {
394        let input = "spec example\nwith @owner/repo/somespec";
395        let result = parse(input, "test.lemma", &ResourceLimits::default())
396            .unwrap()
397            .specs;
398        match &result[0].data[0].value {
399            crate::parsing::ast::DataValue::SpecReference(spec_ref) => {
400                assert_eq!(spec_ref.name, "@owner/repo/somespec");
401                assert!(spec_ref.from_registry);
402            }
403            other => panic!("Expected SpecReference, got: {:?}", other),
404        }
405    }
406
407    #[test]
408    fn parse_with_local_spec_default_alias() {
409        let input = "spec example\nwith myspec";
410        let result = parse(input, "test.lemma", &ResourceLimits::default())
411            .unwrap()
412            .specs;
413        match &result[0].data[0].value {
414            crate::parsing::ast::DataValue::SpecReference(spec_ref) => {
415                assert_eq!(spec_ref.name, "myspec");
416                assert!(!spec_ref.from_registry);
417            }
418            other => panic!("Expected SpecReference, got: {:?}", other),
419        }
420    }
421
422    #[test]
423    fn parse_spec_name_with_trailing_dot_is_error() {
424        let input = "spec myspec.\ndata x: 1";
425        let result = parse(input, "test.lemma", &ResourceLimits::default());
426        assert!(
427            result.is_err(),
428            "Trailing dot after spec name should be a parse error"
429        );
430    }
431
432    #[test]
433    fn parse_multiple_specs_in_same_file() {
434        let input = "spec myspec_a\nrule x: 1\n\nspec myspec_b\nrule x: 2";
435        let result = parse(input, "test.lemma", &ResourceLimits::default())
436            .unwrap()
437            .specs;
438        assert_eq!(result.len(), 2);
439        assert_eq!(result[0].name, "myspec_a");
440        assert_eq!(result[1].name, "myspec_b");
441    }
442
443    #[test]
444    fn parse_with_accepts_name_only() {
445        let input = "spec consumer\nwith other";
446        let result = parse(input, "test.lemma", &ResourceLimits::default());
447        assert!(result.is_ok(), "with name should parse");
448        let spec_ref = match &result.as_ref().unwrap().specs[0].data[0].value {
449            crate::parsing::ast::DataValue::SpecReference(r) => r,
450            _ => panic!("expected SpecReference"),
451        };
452        assert_eq!(spec_ref.name, "other");
453    }
454
455    #[test]
456    fn parse_with_bare_year_effective() {
457        let input = "spec consumer\nwith other 2026";
458        let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
459        let spec_ref = match &result.specs[0].data[0].value {
460            crate::parsing::ast::DataValue::SpecReference(r) => r,
461            _ => panic!("expected SpecReference"),
462        };
463        assert_eq!(spec_ref.name, "other");
464        let eff = spec_ref.effective.as_ref().expect("effective");
465        assert_eq!(eff.year, 2026);
466        assert_eq!(eff.month, 1);
467        assert_eq!(eff.day, 1);
468    }
469
470    #[test]
471    fn parse_with_comma_separated_bare() {
472        let input = "spec consumer\nwith a, b, c";
473        let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
474        let data = &result.specs[0].data;
475        assert_eq!(data.len(), 3);
476        for (i, expected) in ["a", "b", "c"].iter().enumerate() {
477            let sr = match &data[i].value {
478                crate::parsing::ast::DataValue::SpecReference(r) => r,
479                _ => panic!("expected SpecReference for item {i}"),
480            };
481            assert_eq!(sr.name, *expected);
482            assert_eq!(data[i].reference.name, *expected);
483            assert!(sr.effective.is_none());
484        }
485    }
486
487    #[test]
488    fn parse_with_comma_separated_paths() {
489        let input = "spec consumer\nwith pricing/retail, pricing/wholesale";
490        let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
491        let data = &result.specs[0].data;
492        assert_eq!(data.len(), 2);
493        let sr0 = match &data[0].value {
494            crate::parsing::ast::DataValue::SpecReference(r) => r,
495            _ => panic!("expected SpecReference"),
496        };
497        assert_eq!(sr0.name, "pricing/retail");
498        assert_eq!(data[0].reference.name, "retail");
499        let sr1 = match &data[1].value {
500            crate::parsing::ast::DataValue::SpecReference(r) => r,
501            _ => panic!("expected SpecReference"),
502        };
503        assert_eq!(sr1.name, "pricing/wholesale");
504        assert_eq!(data[1].reference.name, "wholesale");
505    }
506
507    #[test]
508    fn parse_with_comma_separated_registry() {
509        let input = "spec consumer\nwith @org/repo/spec_a, @org/repo/spec_b";
510        let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
511        let data = &result.specs[0].data;
512        assert_eq!(data.len(), 2);
513        assert_eq!(data[0].reference.name, "spec_a");
514        assert_eq!(data[1].reference.name, "spec_b");
515    }
516
517    #[test]
518    fn parse_with_alias_no_comma_continuation() {
519        let input = "spec consumer\nwith alias: pricing/retail\ndata x: 1";
520        let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
521        let data = &result.specs[0].data;
522        assert_eq!(data.len(), 2);
523        assert_eq!(data[0].reference.name, "alias");
524        let sr = match &data[0].value {
525            crate::parsing::ast::DataValue::SpecReference(r) => r,
526            _ => panic!("expected SpecReference"),
527        };
528        assert_eq!(sr.name, "pricing/retail");
529    }
530
531    #[test]
532    fn parse_inline_type_from_with_effective() {
533        let input = "spec consumer\ndata price: money from finance 2026-06-01 -> minimum 0";
534        let result = parse(input, "test.lemma", &ResourceLimits::default())
535            .unwrap()
536            .specs;
537        match &result[0].data[0].value {
538            crate::parsing::ast::DataValue::TypeDeclaration { from, .. } => {
539                let spec_ref = from.as_ref().expect("expected from spec ref");
540                assert_eq!(spec_ref.name, "finance");
541                let eff = spec_ref
542                    .effective
543                    .as_ref()
544                    .expect("expected effective datetime");
545                assert_eq!(eff.year, 2026);
546                assert_eq!(eff.month, 6);
547            }
548            other => panic!("expected TypeDeclaration, got: {:?}", other),
549        }
550    }
551
552    #[test]
553    fn parse_error_is_returned_for_garbage_input() {
554        let result = parse(
555            r#"
556spec test
557this is not valid lemma syntax @#$%
558"#,
559            "test.lemma",
560            &ResourceLimits::default(),
561        );
562
563        assert!(result.is_err(), "Should fail on malformed input");
564        match result {
565            Err(Error::Parsing { .. }) => {
566                // Expected
567            }
568            Err(e) => panic!("Expected Parse error, got: {e:?}"),
569            Ok(_) => panic!("Expected parse error"),
570        }
571    }
572
573    // ─── Parser-level pins for DataValue variants ────────────────────
574
575    /// `data x: a.b` (local LHS, dotted RHS) must be parsed as Reference.
576    /// This is the value-copy reference form for local references.
577    #[test]
578    fn parse_data_with_dotted_rhs_is_reference() {
579        let input = r#"spec s
580data a: number -> default 1
581data x: a.something"#;
582        let result = parse(input, "t.lemma", &ResourceLimits::default())
583            .unwrap()
584            .specs;
585        let x_value = &result[0]
586            .data
587            .iter()
588            .find(|d| d.reference.name == "x")
589            .expect("data x not found")
590            .value;
591        assert!(
592            matches!(x_value, crate::parsing::ast::DataValue::Reference { .. }),
593            "dotted RHS must yield DataValue::Reference, got: {:?}",
594            x_value
595        );
596    }
597
598    /// `data x: a.b.c.d` (3+ segment RHS) must parse and preserve segments.
599    #[test]
600    fn parse_data_with_multi_segment_reference_rhs() {
601        let input = r#"spec s
602data x: alpha.beta.gamma.delta"#;
603        let result = parse(input, "t.lemma", &ResourceLimits::default())
604            .unwrap()
605            .specs;
606        let value = &result[0].data[0].value;
607        match value {
608            crate::parsing::ast::DataValue::Reference { target, .. } => {
609                assert_eq!(target.segments, vec!["alpha", "beta", "gamma"]);
610                assert_eq!(target.name, "delta");
611            }
612            other => panic!("expected Reference, got: {:?}", other),
613        }
614    }
615
616    /// `data x: a.b -> minimum 5` must parse as Reference WITH the
617    /// trailing constraint chain captured in `constraints`.
618    #[test]
619    fn parse_reference_with_trailing_constraint_captures_constraints() {
620        let input = r#"spec s
621data x: foo.bar -> minimum 5"#;
622        let result = parse(input, "t.lemma", &ResourceLimits::default())
623            .unwrap()
624            .specs;
625        let value = &result[0].data[0].value;
626        match value {
627            crate::parsing::ast::DataValue::Reference { constraints, .. } => {
628                let c = constraints.as_ref().expect("constraints expected");
629                assert_eq!(c.len(), 1, "exactly one constraint expected, got: {:?}", c);
630            }
631            other => panic!("expected Reference, got: {:?}", other),
632        }
633    }
634
635    /// `data x: notdotted` (local LHS, non-dotted RHS) MUST stay a
636    /// TypeDeclaration — not silently reinterpreted as a Reference. Pin the
637    /// parser behavior so future refactors cannot change the shape without
638    /// the test flipping.
639    #[test]
640    fn parse_local_non_dotted_rhs_stays_type_declaration() {
641        let input = r#"spec s
642data x: myothertype"#;
643        let result = parse(input, "t.lemma", &ResourceLimits::default())
644            .unwrap()
645            .specs;
646        let value = &result[0].data[0].value;
647        assert!(
648            matches!(
649                value,
650                crate::parsing::ast::DataValue::TypeDeclaration { .. }
651            ),
652            "non-dotted local RHS must stay TypeDeclaration, got: {:?}",
653            value
654        );
655    }
656
657    /// `data x.y: notdotted` (binding LHS, non-dotted RHS) IS parsed as
658    /// Reference per the current implementation — even though the AST doc
659    /// comment claims otherwise. Pin the real behavior.
660    #[test]
661    fn parse_binding_non_dotted_rhs_is_reference() {
662        let input = r#"spec s
663data child.slot: somename"#;
664        let result = parse(input, "t.lemma", &ResourceLimits::default())
665            .unwrap()
666            .specs;
667        let value = &result[0].data[0].value;
668        assert!(
669            matches!(value, crate::parsing::ast::DataValue::Reference { .. }),
670            "non-dotted RHS in binding context must yield Reference; got: {:?}",
671            value
672        );
673    }
674
675    /// Legacy syntax `data x: spec other` was removed; must be rejected.
676    #[test]
677    fn parse_legacy_data_colon_spec_is_rejected() {
678        let result = parse(
679            r#"
680spec s
681data x: spec other
682"#,
683            "t.lemma",
684            &ResourceLimits::default(),
685        );
686        match result {
687            Ok(_) => panic!("legacy `data x: spec other` must fail to parse"),
688            Err(err) => {
689                let msg = err.to_string();
690                assert!(
691                    msg.contains("spec") && (msg.contains("removed") || msg.contains("syntax")),
692                    "error must indicate the legacy syntax was removed, got: {msg}"
693                );
694            }
695        }
696    }
697
698    /// `data x.y: z.w` (binding LHS, dotted RHS) → Reference with two LHS
699    /// segments and two RHS segments.
700    #[test]
701    fn parse_binding_with_dotted_rhs_preserves_both_sides() {
702        let input = r#"spec s
703data outer.inner: target.field"#;
704        let result = parse(input, "t.lemma", &ResourceLimits::default())
705            .unwrap()
706            .specs;
707        let datum = &result[0].data[0];
708        assert_eq!(datum.reference.segments, vec!["outer"]);
709        assert_eq!(datum.reference.name, "inner");
710        match &datum.value {
711            crate::parsing::ast::DataValue::Reference {
712                target,
713                constraints,
714            } => {
715                assert_eq!(target.segments, vec!["target"]);
716                assert_eq!(target.name, "field");
717                assert!(constraints.is_none(), "no trailing constraints expected");
718            }
719            other => panic!("expected Reference, got: {:?}", other),
720        }
721    }
722}