lemma/parsing/
mod.rs

1use crate::error::LemmaError;
2use crate::limits::ResourceLimits;
3use pest::iterators::Pair;
4use pest::Parser;
5use pest_derive::Parser;
6use std::sync::Arc;
7
8pub mod ast;
9pub mod expressions;
10pub mod facts;
11pub mod literals;
12pub mod rules;
13pub mod source;
14pub mod types;
15pub mod units;
16
17pub use ast::{DepthTracker, Span};
18pub use source::Source;
19
20pub use crate::semantic::*;
21
22#[derive(Parser)]
23#[grammar = "src/parsing/lemma.pest"]
24pub struct LemmaParser;
25
26pub fn parse(
27    content: &str,
28    attribute: &str,
29    limits: &ResourceLimits,
30) -> Result<Vec<LemmaDoc>, LemmaError> {
31    if content.len() > limits.max_file_size_bytes {
32        return Err(LemmaError::ResourceLimitExceeded {
33            limit_name: "max_file_size_bytes".to_string(),
34            limit_value: format!(
35                "{} bytes ({} MB)",
36                limits.max_file_size_bytes,
37                limits.max_file_size_bytes / (1024 * 1024)
38            ),
39            actual_value: format!(
40                "{} bytes ({:.2} MB)",
41                content.len(),
42                content.len() as f64 / (1024.0 * 1024.0)
43            ),
44            suggestion: "Reduce file size or split into multiple documents".to_string(),
45        });
46    }
47
48    let mut depth_tracker = DepthTracker::with_max_depth(limits.max_expression_depth);
49
50    match LemmaParser::parse(Rule::lemma_file, content) {
51        Ok(mut pairs) => {
52            let mut docs = Vec::new();
53            if let Some(lemma_file_pair) = pairs.next() {
54                for inner_pair in lemma_file_pair.into_inner() {
55                    if inner_pair.as_rule() == Rule::doc {
56                        docs.push(parse_doc(inner_pair, attribute, &mut depth_tracker)?);
57                    }
58                }
59            }
60            Ok(docs)
61        }
62        Err(e) => {
63            let pest_span = match e.line_col {
64                pest::error::LineColLocation::Pos((line, col)) => Span {
65                    start: 0,
66                    end: 0,
67                    line,
68                    col,
69                },
70                pest::error::LineColLocation::Span((start_line, start_col), (_, _)) => Span {
71                    start: 0,
72                    end: 0,
73                    line: start_line,
74                    col: start_col,
75                },
76            };
77
78            Err(LemmaError::parse(
79                format!("Parse error: {}", e.variant),
80                pest_span,
81                attribute,
82                Arc::from(content),
83                "<parse-error>",
84                1,
85                None::<String>,
86            ))
87        }
88    }
89}
90
91fn parse_doc(
92    pair: Pair<Rule>,
93    attribute: &str,
94    depth_tracker: &mut DepthTracker,
95) -> Result<LemmaDoc, LemmaError> {
96    let doc_start_line = pair.as_span().start_pos().line_col().0;
97
98    let mut doc_name: Option<String> = None;
99    let mut commentary: Option<String> = None;
100    let mut facts = Vec::new();
101    let mut rules = Vec::new();
102    let mut types = Vec::new();
103
104    // First, extract doc_header to get commentary and doc_declaration
105    for header_item in pair.clone().into_inner() {
106        match header_item.as_rule() {
107            Rule::commentary_block => {
108                for block_inner in header_item.into_inner() {
109                    if block_inner.as_rule() == Rule::commentary {
110                        commentary = Some(block_inner.as_str().trim().to_string());
111                        break;
112                    }
113                }
114            }
115            Rule::doc_declaration => {
116                for decl_inner in header_item.into_inner() {
117                    if decl_inner.as_rule() == Rule::doc_name {
118                        doc_name = Some(decl_inner.as_str().to_string());
119                        break;
120                    }
121                }
122            }
123            _ => {}
124        }
125    }
126
127    let name = doc_name.ok_or_else(|| {
128        LemmaError::engine(
129            "Grammar error: doc missing doc_declaration",
130            Span {
131                start: 0,
132                end: 0,
133                line: 1,
134                col: 0,
135            },
136            "<unknown>",
137            std::sync::Arc::from(""),
138            "<unknown>",
139            1,
140            None::<String>,
141        )
142    })?;
143
144    // First pass: collect all named type definitions from doc_body
145    // These are explicit type definitions like: `type money = number -> minimum 0`
146    // and type imports like: `type money from "other_doc"`
147    // Note: Inline type definitions (e.g., `fact price = [number -> minimum 0]`) are
148    // handled during fact parsing, not collected here.
149    for inner_pair in pair.clone().into_inner() {
150        if inner_pair.as_rule() == Rule::doc_body {
151            for body_item in inner_pair.into_inner() {
152                match body_item.as_rule() {
153                    Rule::type_definition => {
154                        let type_def = crate::parsing::types::parse_type_definition(body_item)?;
155                        types.push(type_def);
156                    }
157                    Rule::type_import => {
158                        let type_def = crate::parsing::types::parse_type_import(body_item)?;
159                        types.push(type_def);
160                    }
161                    _ => {}
162                }
163            }
164        }
165    }
166
167    // Second pass: parse facts and rules from doc_body (which may reference named types via type_declaration
168    // or use inline_type_definition for inline type definitions)
169    for inner_pair in pair.into_inner() {
170        if inner_pair.as_rule() == Rule::doc_body {
171            for body_item in inner_pair.into_inner() {
172                match body_item.as_rule() {
173                    Rule::fact_definition => {
174                        let fact = crate::parsing::facts::parse_fact_definition(
175                            body_item, attribute, &name, &types,
176                        )?;
177                        facts.push(fact);
178                    }
179                    Rule::fact_override => {
180                        let fact = crate::parsing::facts::parse_fact_override(
181                            body_item, attribute, &name, &types,
182                        )?;
183                        facts.push(fact);
184                    }
185                    Rule::rule_definition => {
186                        let rule = crate::parsing::rules::parse_rule_definition(
187                            body_item,
188                            depth_tracker,
189                            attribute,
190                            &name,
191                        )?;
192                        rules.push(rule);
193                    }
194                    _ => {}
195                }
196            }
197        }
198    }
199    let mut doc = LemmaDoc::new(name)
200        .with_attribute(attribute.to_string())
201        .with_start_line(doc_start_line);
202
203    if let Some(commentary_text) = commentary {
204        doc = doc.set_commentary(commentary_text);
205    }
206
207    for fact in facts {
208        doc = doc.add_fact(fact);
209    }
210    for rule in rules {
211        doc = doc.add_rule(rule);
212    }
213    for type_def in types {
214        doc = doc.add_type(type_def);
215    }
216
217    Ok(doc)
218}
219
220// ============================================================================
221// Tests
222// ============================================================================
223
224#[cfg(test)]
225mod tests {
226    use super::parse;
227    use crate::LemmaError;
228    use crate::ResourceLimits;
229
230    #[test]
231    fn parse_empty_input_returns_no_documents() {
232        let result = parse("", "test.lemma", &ResourceLimits::default()).unwrap();
233        assert_eq!(result.len(), 0);
234    }
235
236    #[test]
237    fn parse_workspace_file_yields_expected_doc_facts_and_rules() {
238        let input = r#"doc person
239fact name = "John Doe"
240rule adult = true"#;
241        let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
242        assert_eq!(result.len(), 1);
243        assert_eq!(result[0].name, "person");
244        assert_eq!(result[0].facts.len(), 1);
245        assert_eq!(result[0].rules.len(), 1);
246        assert_eq!(result[0].rules[0].name, "adult");
247    }
248
249    #[test]
250    fn mixing_facts_and_rules_is_collected_into_doc() {
251        let input = r#"doc test
252fact name = "John"
253rule is_adult = age >= 18
254fact age = 25
255rule can_drink = age >= 21
256fact status = "active"
257rule is_eligible = is_adult and status == "active""#;
258
259        let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
260        assert_eq!(result.len(), 1);
261        assert_eq!(result[0].facts.len(), 3);
262        assert_eq!(result[0].rules.len(), 3);
263    }
264
265    #[test]
266    fn parse_simple_document_collects_facts() {
267        let input = r#"doc person
268fact name = "John"
269fact age = 25"#;
270        let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
271        assert_eq!(result.len(), 1);
272        assert_eq!(result[0].name, "person");
273        assert_eq!(result[0].facts.len(), 2);
274    }
275
276    #[test]
277    fn parse_doc_name_with_slashes_is_preserved() {
278        let input = r#"doc contracts/employment/jack
279fact name = "Jack""#;
280        let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
281        assert_eq!(result.len(), 1);
282        assert_eq!(result[0].name, "contracts/employment/jack");
283    }
284
285    #[test]
286    fn parse_commentary_block_is_attached_to_doc() {
287        let input = r#"doc person
288"""
289This is a markdown comment
290with **bold** text
291"""
292fact name = "John""#;
293        let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
294        assert_eq!(result.len(), 1);
295        assert!(result[0].commentary.is_some());
296        assert!(result[0].commentary.as_ref().unwrap().contains("**bold**"));
297    }
298
299    #[test]
300    fn parse_document_with_rule_collects_rule() {
301        let input = r#"doc person
302rule is_adult = age >= 18"#;
303        let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
304        assert_eq!(result.len(), 1);
305        assert_eq!(result[0].rules.len(), 1);
306        assert_eq!(result[0].rules[0].name, "is_adult");
307    }
308
309    #[test]
310    fn parse_multiple_documents_returns_all_docs() {
311        let input = r#"doc person
312fact name = "John"
313
314doc company
315fact name = "Acme Corp""#;
316        let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
317        assert_eq!(result.len(), 2);
318        assert_eq!(result[0].name, "person");
319        assert_eq!(result[1].name, "company");
320    }
321
322    #[test]
323    fn parse_allows_duplicate_fact_names() {
324        // Duplicate fact names are rejected during planning/validation, not parsing.
325        let input = r#"doc person
326fact name = "John"
327fact name = "Jane""#;
328        let result = parse(input, "test.lemma", &ResourceLimits::default());
329        assert!(
330            result.is_ok(),
331            "Parser should succeed even with duplicate facts"
332        );
333    }
334
335    #[test]
336    fn parse_allows_duplicate_rule_names() {
337        // Duplicate rule names are rejected during planning/validation, not parsing.
338        let input = r#"doc person
339rule is_adult = age >= 18
340rule is_adult = age >= 21"#;
341        let result = parse(input, "test.lemma", &ResourceLimits::default());
342        assert!(
343            result.is_ok(),
344            "Parser should succeed even with duplicate rules"
345        );
346    }
347
348    #[test]
349    fn parse_rejects_malformed_input() {
350        let input = "invalid syntax here";
351        let result = parse(input, "test.lemma", &ResourceLimits::default());
352        assert!(result.is_err());
353    }
354
355    #[test]
356    fn parse_handles_whitespace_variants_in_expressions() {
357        let test_cases = vec![
358            ("doc test\nrule test = 2+3", "no spaces in arithmetic"),
359            ("doc test\nrule test = age>=18", "no spaces in comparison"),
360            (
361                "doc test\nrule test = age >= 18 and salary>50000",
362                "spaces around and keyword",
363            ),
364            (
365                "doc test\nrule test = age  >=  18  and  salary  >  50000",
366                "extra spaces",
367            ),
368            (
369                "doc test\nrule test = \n  age >= 18 \n  and \n  salary > 50000",
370                "newlines in expression",
371            ),
372        ];
373
374        for (input, description) in test_cases {
375            let result = parse(input, "test.lemma", &ResourceLimits::default());
376            assert!(
377                result.is_ok(),
378                "Failed to parse {} ({}): {:?}",
379                input,
380                description,
381                result.err()
382            );
383        }
384    }
385
386    #[test]
387    fn parse_error_cases_are_rejected() {
388        let error_cases = vec![
389            (
390                "doc test\nfact name = \"unclosed string",
391                "unclosed string literal",
392            ),
393            ("doc test\nrule test = 2 + + 3", "double operator"),
394            ("doc test\nrule test = (2 + 3", "unclosed parenthesis"),
395            ("doc test\nrule test = 2 + 3)", "extra closing paren"),
396            // Note: "invalid unit" now parses as a user-defined unit (validated during planning)
397            ("doc test\nfact doc = 123", "reserved keyword as fact name"),
398            (
399                "doc test\nrule rule = true",
400                "reserved keyword as rule name",
401            ),
402        ];
403
404        for (input, description) in error_cases {
405            let result = parse(input, "test.lemma", &ResourceLimits::default());
406            assert!(
407                result.is_err(),
408                "Expected error for {} but got success",
409                description
410            );
411        }
412    }
413
414    #[test]
415    fn parse_duration_literals_in_rules() {
416        // After removing hardcoded units, only duration units remain as built-in
417        let test_cases = vec![
418            ("2 years", "years"),
419            ("6 months", "months"),
420            ("52 weeks", "weeks"),
421            ("365 days", "days"),
422            ("24 hours", "hours"),
423            ("60 minutes", "minutes"),
424            ("3600 seconds", "seconds"),
425            ("1000 milliseconds", "milliseconds"),
426            ("500000 microseconds", "microseconds"),
427            ("50 percent", "percent"),
428        ];
429
430        for (expr, description) in test_cases {
431            let input = format!("doc test\nrule test = {}", expr);
432            let result = parse(&input, "test.lemma", &ResourceLimits::default());
433            assert!(
434                result.is_ok(),
435                "Failed to parse literal {} ({}): {:?}",
436                expr,
437                description,
438                result.err()
439            );
440        }
441    }
442
443    #[test]
444    fn parse_comparisons_with_duration_unit_conversions() {
445        // After removing hardcoded units, only duration conversions remain as built-in
446        let test_cases = vec![
447            (
448                "(duration in hours) > 2",
449                "duration conversion in comparison with parens",
450            ),
451            (
452                "(meeting_time in minutes) >= 30",
453                "duration conversion with gte",
454            ),
455            (
456                "(project_length in days) < 100",
457                "duration conversion with lt",
458            ),
459            (
460                "(delay in seconds) == 60",
461                "duration conversion with equality",
462            ),
463            (
464                "(1 hours) > (30 minutes)",
465                "duration conversions on both sides",
466            ),
467            (
468                "duration in hours > 2",
469                "duration conversion without parens",
470            ),
471            (
472                "meeting_time in seconds > 3600",
473                "variable duration conversion in comparison",
474            ),
475            (
476                "project_length in days > deadline_days",
477                "two variables with duration conversion",
478            ),
479            (
480                "duration in hours >= 1 and duration in hours <= 8",
481                "multiple duration comparisons",
482            ),
483        ];
484
485        for (expr, description) in test_cases {
486            let input = format!("doc test\nrule test = {}", expr);
487            let result = parse(&input, "test.lemma", &ResourceLimits::default());
488            assert!(
489                result.is_ok(),
490                "Failed to parse {} ({}): {:?}",
491                expr,
492                description,
493                result.err()
494            );
495        }
496    }
497
498    #[test]
499    fn parse_error_includes_attribute_and_parse_error_doc_name() {
500        let result = parse(
501            r#"
502doc test
503fact name = "Unclosed string
504fact age = 25
505"#,
506            "test.lemma",
507            &ResourceLimits::default(),
508        );
509
510        match result {
511            Err(LemmaError::Parse(details)) => {
512                assert_eq!(details.source_location.attribute, "test.lemma");
513                assert_eq!(details.source_location.doc_name, "<parse-error>");
514            }
515            Err(e) => panic!("Expected Parse error, got: {e:?}"),
516            Ok(_) => panic!("Expected parse error for unclosed string"),
517        }
518    }
519
520    #[test]
521    fn parse_error_is_returned_for_garbage_input() {
522        let result = parse(
523            r#"
524doc test
525this is not valid lemma syntax @#$%
526"#,
527            "test.lemma",
528            &ResourceLimits::default(),
529        );
530
531        assert!(result.is_err(), "Should fail on malformed input");
532        match result {
533            Err(LemmaError::Parse { .. }) => {
534                // Expected
535            }
536            Err(e) => panic!("Expected Parse error, got: {e:?}"),
537            Ok(_) => panic!("Expected parse error"),
538        }
539    }
540}