Skip to main content

lemma/parsing/
mod.rs

1use crate::error::LemmaError;
2use crate::limits::ResourceLimits;
3use pest::iterators::Pair;
4use pest::Parser;
5use pest_derive::Parser;
6use std::sync::Arc;
7
8pub mod ast;
9pub mod expressions;
10pub mod facts;
11pub mod literals;
12pub mod rules;
13pub mod source;
14pub mod types;
15pub mod units;
16
17pub use ast::{DepthTracker, Span};
18pub use source::Source;
19
20pub use crate::semantic::*;
21
22#[derive(Parser)]
23#[grammar = "src/parsing/lemma.pest"]
24pub struct LemmaParser;
25
26pub fn parse(
27    content: &str,
28    attribute: &str,
29    limits: &ResourceLimits,
30) -> Result<Vec<LemmaDoc>, LemmaError> {
31    if content.len() > limits.max_file_size_bytes {
32        return Err(LemmaError::ResourceLimitExceeded {
33            limit_name: "max_file_size_bytes".to_string(),
34            limit_value: format!(
35                "{} bytes ({} MB)",
36                limits.max_file_size_bytes,
37                limits.max_file_size_bytes / (1024 * 1024)
38            ),
39            actual_value: format!(
40                "{} bytes ({:.2} MB)",
41                content.len(),
42                content.len() as f64 / (1024.0 * 1024.0)
43            ),
44            suggestion: "Reduce file size or split into multiple documents".to_string(),
45        });
46    }
47
48    let mut depth_tracker = DepthTracker::with_max_depth(limits.max_expression_depth);
49
50    match LemmaParser::parse(Rule::lemma_file, content) {
51        Ok(mut pairs) => {
52            let mut docs = Vec::new();
53            if let Some(lemma_file_pair) = pairs.next() {
54                for inner_pair in lemma_file_pair.into_inner() {
55                    if inner_pair.as_rule() == Rule::doc {
56                        docs.push(parse_doc(inner_pair, attribute, &mut depth_tracker)?);
57                    }
58                }
59            }
60            Ok(docs)
61        }
62        Err(e) => {
63            let pest_span = match e.line_col {
64                pest::error::LineColLocation::Pos((line, col)) => Span {
65                    start: 0,
66                    end: 0,
67                    line,
68                    col,
69                },
70                pest::error::LineColLocation::Span((start_line, start_col), (_, _)) => Span {
71                    start: 0,
72                    end: 0,
73                    line: start_line,
74                    col: start_col,
75                },
76            };
77
78            Err(LemmaError::parse(
79                e.variant.to_string(),
80                pest_span,
81                attribute,
82                Arc::from(content),
83                "<parse-error>",
84                1,
85                None::<String>,
86            ))
87        }
88    }
89}
90
91fn parse_doc(
92    pair: Pair<Rule>,
93    attribute: &str,
94    depth_tracker: &mut DepthTracker,
95) -> Result<LemmaDoc, LemmaError> {
96    let doc_start_line = pair.as_span().start_pos().line_col().0;
97
98    let mut doc_name: Option<String> = None;
99    let mut commentary: Option<String> = None;
100    let mut facts = Vec::new();
101    let mut rules = Vec::new();
102    let mut types = Vec::new();
103
104    // First, extract doc_header to get commentary and doc_declaration
105    for header_item in pair.clone().into_inner() {
106        match header_item.as_rule() {
107            Rule::commentary_block => {
108                for block_inner in header_item.into_inner() {
109                    if block_inner.as_rule() == Rule::commentary {
110                        commentary = Some(block_inner.as_str().trim().to_string());
111                        break;
112                    }
113                }
114            }
115            Rule::doc_declaration => {
116                for decl_inner in header_item.into_inner() {
117                    if decl_inner.as_rule() == Rule::doc_name {
118                        doc_name = Some(decl_inner.as_str().to_string());
119                        break;
120                    }
121                }
122            }
123            _ => {}
124        }
125    }
126
127    let name = doc_name.ok_or_else(|| {
128        LemmaError::engine(
129            "Grammar error: doc missing doc_declaration",
130            Span {
131                start: 0,
132                end: 0,
133                line: 1,
134                col: 0,
135            },
136            attribute,
137            std::sync::Arc::from(""),
138            "<parse-error>",
139            1,
140            None::<String>,
141        )
142    })?;
143
144    // First pass: collect all named type definitions from doc_body
145    // These are explicit type definitions like: `type money = number -> minimum 0`
146    // and type imports like: `type money from "other_doc"`
147    // Note: Inline type definitions (e.g., `fact price = [number -> minimum 0]`) are
148    // handled during fact parsing, not collected here.
149    for inner_pair in pair.clone().into_inner() {
150        if inner_pair.as_rule() == Rule::doc_body {
151            for body_item in inner_pair.into_inner() {
152                match body_item.as_rule() {
153                    Rule::type_definition => {
154                        let type_def = crate::parsing::types::parse_type_definition(
155                            body_item, attribute, &name,
156                        )?;
157                        types.push(type_def);
158                    }
159                    Rule::type_import => {
160                        let type_def =
161                            crate::parsing::types::parse_type_import(body_item, attribute, &name)?;
162                        types.push(type_def);
163                    }
164                    _ => {}
165                }
166            }
167        }
168    }
169
170    // Second pass: parse facts and rules from doc_body (which may reference named types via type_declaration
171    // or use inline_type_definition for inline type definitions)
172    for inner_pair in pair.into_inner() {
173        if inner_pair.as_rule() == Rule::doc_body {
174            for body_item in inner_pair.into_inner() {
175                match body_item.as_rule() {
176                    Rule::fact_definition => {
177                        let fact = crate::parsing::facts::parse_fact_definition(
178                            body_item, attribute, &name, &types,
179                        )?;
180                        facts.push(fact);
181                    }
182                    Rule::fact_override => {
183                        let fact = crate::parsing::facts::parse_fact_override(
184                            body_item, attribute, &name, &types,
185                        )?;
186                        facts.push(fact);
187                    }
188                    Rule::rule_definition => {
189                        let rule = crate::parsing::rules::parse_rule_definition(
190                            body_item,
191                            depth_tracker,
192                            attribute,
193                            &name,
194                        )?;
195                        rules.push(rule);
196                    }
197                    _ => {}
198                }
199            }
200        }
201    }
202    let mut doc = LemmaDoc::new(name)
203        .with_attribute(attribute.to_string())
204        .with_start_line(doc_start_line);
205
206    if let Some(commentary_text) = commentary {
207        doc = doc.set_commentary(commentary_text);
208    }
209
210    for fact in facts {
211        doc = doc.add_fact(fact);
212    }
213    for rule in rules {
214        doc = doc.add_rule(rule);
215    }
216    for type_def in types {
217        doc = doc.add_type(type_def);
218    }
219
220    Ok(doc)
221}
222
223// ============================================================================
224// Tests
225// ============================================================================
226
227#[cfg(test)]
228mod tests {
229    use super::parse;
230    use crate::LemmaError;
231    use crate::ResourceLimits;
232
233    #[test]
234    fn parse_empty_input_returns_no_documents() {
235        let result = parse("", "test.lemma", &ResourceLimits::default()).unwrap();
236        assert_eq!(result.len(), 0);
237    }
238
239    #[test]
240    fn parse_workspace_file_yields_expected_doc_facts_and_rules() {
241        let input = r#"doc person
242fact name = "John Doe"
243rule adult = true"#;
244        let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
245        assert_eq!(result.len(), 1);
246        assert_eq!(result[0].name, "person");
247        assert_eq!(result[0].facts.len(), 1);
248        assert_eq!(result[0].rules.len(), 1);
249        assert_eq!(result[0].rules[0].name, "adult");
250    }
251
252    #[test]
253    fn mixing_facts_and_rules_is_collected_into_doc() {
254        let input = r#"doc test
255fact name = "John"
256rule is_adult = age >= 18
257fact age = 25
258rule can_drink = age >= 21
259fact status = "active"
260rule is_eligible = is_adult and status == "active""#;
261
262        let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
263        assert_eq!(result.len(), 1);
264        assert_eq!(result[0].facts.len(), 3);
265        assert_eq!(result[0].rules.len(), 3);
266    }
267
268    #[test]
269    fn parse_simple_document_collects_facts() {
270        let input = r#"doc person
271fact name = "John"
272fact age = 25"#;
273        let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
274        assert_eq!(result.len(), 1);
275        assert_eq!(result[0].name, "person");
276        assert_eq!(result[0].facts.len(), 2);
277    }
278
279    #[test]
280    fn parse_doc_name_with_slashes_is_preserved() {
281        let input = r#"doc contracts/employment/jack
282fact name = "Jack""#;
283        let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
284        assert_eq!(result.len(), 1);
285        assert_eq!(result[0].name, "contracts/employment/jack");
286    }
287
288    #[test]
289    fn parse_commentary_block_is_attached_to_doc() {
290        let input = r#"doc person
291"""
292This is a markdown comment
293with **bold** text
294"""
295fact name = "John""#;
296        let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
297        assert_eq!(result.len(), 1);
298        assert!(result[0].commentary.is_some());
299        assert!(result[0].commentary.as_ref().unwrap().contains("**bold**"));
300    }
301
302    #[test]
303    fn parse_document_with_rule_collects_rule() {
304        let input = r#"doc person
305rule is_adult = age >= 18"#;
306        let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
307        assert_eq!(result.len(), 1);
308        assert_eq!(result[0].rules.len(), 1);
309        assert_eq!(result[0].rules[0].name, "is_adult");
310    }
311
312    #[test]
313    fn parse_multiple_documents_returns_all_docs() {
314        let input = r#"doc person
315fact name = "John"
316
317doc company
318fact name = "Acme Corp""#;
319        let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
320        assert_eq!(result.len(), 2);
321        assert_eq!(result[0].name, "person");
322        assert_eq!(result[1].name, "company");
323    }
324
325    #[test]
326    fn parse_allows_duplicate_fact_names() {
327        // Duplicate fact names are rejected during planning/validation, not parsing.
328        let input = r#"doc person
329fact name = "John"
330fact name = "Jane""#;
331        let result = parse(input, "test.lemma", &ResourceLimits::default());
332        assert!(
333            result.is_ok(),
334            "Parser should succeed even with duplicate facts"
335        );
336    }
337
338    #[test]
339    fn parse_allows_duplicate_rule_names() {
340        // Duplicate rule names are rejected during planning/validation, not parsing.
341        let input = r#"doc person
342rule is_adult = age >= 18
343rule is_adult = age >= 21"#;
344        let result = parse(input, "test.lemma", &ResourceLimits::default());
345        assert!(
346            result.is_ok(),
347            "Parser should succeed even with duplicate rules"
348        );
349    }
350
351    #[test]
352    fn parse_rejects_malformed_input() {
353        let input = "invalid syntax here";
354        let result = parse(input, "test.lemma", &ResourceLimits::default());
355        assert!(result.is_err());
356    }
357
358    #[test]
359    fn parse_handles_whitespace_variants_in_expressions() {
360        let test_cases = vec![
361            ("doc test\nrule test = 2+3", "no spaces in arithmetic"),
362            ("doc test\nrule test = age>=18", "no spaces in comparison"),
363            (
364                "doc test\nrule test = age >= 18 and salary>50000",
365                "spaces around and keyword",
366            ),
367            (
368                "doc test\nrule test = age  >=  18  and  salary  >  50000",
369                "extra spaces",
370            ),
371            (
372                "doc test\nrule test = \n  age >= 18 \n  and \n  salary > 50000",
373                "newlines in expression",
374            ),
375        ];
376
377        for (input, description) in test_cases {
378            let result = parse(input, "test.lemma", &ResourceLimits::default());
379            assert!(
380                result.is_ok(),
381                "Failed to parse {} ({}): {:?}",
382                input,
383                description,
384                result.err()
385            );
386        }
387    }
388
389    #[test]
390    fn parse_error_cases_are_rejected() {
391        let error_cases = vec![
392            (
393                "doc test\nfact name = \"unclosed string",
394                "unclosed string literal",
395            ),
396            ("doc test\nrule test = 2 + + 3", "double operator"),
397            ("doc test\nrule test = (2 + 3", "unclosed parenthesis"),
398            ("doc test\nrule test = 2 + 3)", "extra closing paren"),
399            // Note: "invalid unit" now parses as a user-defined unit (validated during planning)
400            ("doc test\nfact doc = 123", "reserved keyword as fact name"),
401            (
402                "doc test\nrule rule = true",
403                "reserved keyword as rule name",
404            ),
405        ];
406
407        for (input, description) in error_cases {
408            let result = parse(input, "test.lemma", &ResourceLimits::default());
409            assert!(
410                result.is_err(),
411                "Expected error for {} but got success",
412                description
413            );
414        }
415    }
416
417    #[test]
418    fn parse_duration_literals_in_rules() {
419        // After removing hardcoded units, only duration units remain as built-in
420        let test_cases = vec![
421            ("2 years", "years"),
422            ("6 months", "months"),
423            ("52 weeks", "weeks"),
424            ("365 days", "days"),
425            ("24 hours", "hours"),
426            ("60 minutes", "minutes"),
427            ("3600 seconds", "seconds"),
428            ("1000 milliseconds", "milliseconds"),
429            ("500000 microseconds", "microseconds"),
430            ("50 percent", "percent"),
431        ];
432
433        for (expr, description) in test_cases {
434            let input = format!("doc test\nrule test = {}", expr);
435            let result = parse(&input, "test.lemma", &ResourceLimits::default());
436            assert!(
437                result.is_ok(),
438                "Failed to parse literal {} ({}): {:?}",
439                expr,
440                description,
441                result.err()
442            );
443        }
444    }
445
446    #[test]
447    fn parse_comparisons_with_duration_unit_conversions() {
448        // After removing hardcoded units, only duration conversions remain as built-in
449        let test_cases = vec![
450            (
451                "(duration in hours) > 2",
452                "duration conversion in comparison with parens",
453            ),
454            (
455                "(meeting_time in minutes) >= 30",
456                "duration conversion with gte",
457            ),
458            (
459                "(project_length in days) < 100",
460                "duration conversion with lt",
461            ),
462            (
463                "(delay in seconds) == 60",
464                "duration conversion with equality",
465            ),
466            (
467                "(1 hours) > (30 minutes)",
468                "duration conversions on both sides",
469            ),
470            (
471                "duration in hours > 2",
472                "duration conversion without parens",
473            ),
474            (
475                "meeting_time in seconds > 3600",
476                "variable duration conversion in comparison",
477            ),
478            (
479                "project_length in days > deadline_days",
480                "two variables with duration conversion",
481            ),
482            (
483                "duration in hours >= 1 and duration in hours <= 8",
484                "multiple duration comparisons",
485            ),
486        ];
487
488        for (expr, description) in test_cases {
489            let input = format!("doc test\nrule test = {}", expr);
490            let result = parse(&input, "test.lemma", &ResourceLimits::default());
491            assert!(
492                result.is_ok(),
493                "Failed to parse {} ({}): {:?}",
494                expr,
495                description,
496                result.err()
497            );
498        }
499    }
500
501    #[test]
502    fn parse_error_includes_attribute_and_parse_error_doc_name() {
503        let result = parse(
504            r#"
505doc test
506fact name = "Unclosed string
507fact age = 25
508"#,
509            "test.lemma",
510            &ResourceLimits::default(),
511        );
512
513        match result {
514            Err(LemmaError::Parse(details)) => {
515                assert_eq!(details.source_location.attribute, "test.lemma");
516                assert_eq!(details.source_location.doc_name, "<parse-error>");
517            }
518            Err(e) => panic!("Expected Parse error, got: {e:?}"),
519            Ok(_) => panic!("Expected parse error for unclosed string"),
520        }
521    }
522
523    #[test]
524    fn parse_error_is_returned_for_garbage_input() {
525        let result = parse(
526            r#"
527doc test
528this is not valid lemma syntax @#$%
529"#,
530            "test.lemma",
531            &ResourceLimits::default(),
532        );
533
534        assert!(result.is_err(), "Should fail on malformed input");
535        match result {
536            Err(LemmaError::Parse { .. }) => {
537                // Expected
538            }
539            Err(e) => panic!("Expected Parse error, got: {e:?}"),
540            Ok(_) => panic!("Expected parse error"),
541        }
542    }
543}