Skip to main content

lemma/parsing/
mod.rs

1use crate::error::LemmaError;
2use crate::limits::ResourceLimits;
3use pest::iterators::Pair;
4use pest::Parser;
5use pest_derive::Parser;
6use std::sync::Arc;
7
8pub mod ast;
9pub mod expressions;
10pub mod facts;
11pub mod literals;
12pub mod rules;
13pub mod source;
14pub mod types;
15
16pub use ast::{DepthTracker, Span};
17pub use source::Source;
18
19pub use ast::*;
20
21#[derive(Parser)]
22#[grammar = "src/parsing/lemma.pest"]
23pub struct LemmaParser;
24
25pub fn parse(
26    content: &str,
27    attribute: &str,
28    limits: &ResourceLimits,
29) -> Result<Vec<LemmaDoc>, LemmaError> {
30    if content.len() > limits.max_file_size_bytes {
31        return Err(LemmaError::ResourceLimitExceeded {
32            limit_name: "max_file_size_bytes".to_string(),
33            limit_value: format!(
34                "{} bytes ({} MB)",
35                limits.max_file_size_bytes,
36                limits.max_file_size_bytes / (1024 * 1024)
37            ),
38            actual_value: format!(
39                "{} bytes ({:.2} MB)",
40                content.len(),
41                content.len() as f64 / (1024.0 * 1024.0)
42            ),
43            suggestion: "Reduce file size or split into multiple documents".to_string(),
44        });
45    }
46
47    let mut depth_tracker = DepthTracker::with_max_depth(limits.max_expression_depth);
48
49    let source_text: Arc<str> = Arc::from(content);
50
51    match LemmaParser::parse(Rule::lemma_file, content) {
52        Ok(mut pairs) => {
53            let mut docs = Vec::new();
54            if let Some(lemma_file_pair) = pairs.next() {
55                for inner_pair in lemma_file_pair.into_inner() {
56                    if inner_pair.as_rule() == Rule::doc {
57                        docs.push(parse_doc(
58                            inner_pair,
59                            attribute,
60                            &mut depth_tracker,
61                            source_text.clone(),
62                        )?);
63                    }
64                }
65            }
66            Ok(docs)
67        }
68        Err(e) => {
69            let pest_span = match e.line_col {
70                pest::error::LineColLocation::Pos((line, col)) => Span {
71                    start: 0,
72                    end: 0,
73                    line,
74                    col,
75                },
76                pest::error::LineColLocation::Span((start_line, start_col), (_, _)) => Span {
77                    start: 0,
78                    end: 0,
79                    line: start_line,
80                    col: start_col,
81                },
82            };
83
84            Err(LemmaError::parse(
85                e.variant.to_string(),
86                Some(crate::parsing::source::Source::new(
87                    attribute,
88                    pest_span,
89                    "",
90                    source_text,
91                )),
92                None::<String>,
93            ))
94        }
95    }
96}
97
98fn parse_doc(
99    pair: Pair<Rule>,
100    attribute: &str,
101    depth_tracker: &mut DepthTracker,
102    source_text: Arc<str>,
103) -> Result<LemmaDoc, LemmaError> {
104    let doc_start_line = pair.as_span().start_pos().line_col().0;
105
106    let mut doc_name: Option<String> = None;
107    let mut commentary: Option<String> = None;
108    let mut facts = Vec::new();
109    let mut rules = Vec::new();
110    let mut types = Vec::new();
111
112    // First, extract doc_header to get commentary and doc_declaration
113    for header_item in pair.clone().into_inner() {
114        match header_item.as_rule() {
115            Rule::commentary_block => {
116                for block_inner in header_item.into_inner() {
117                    if block_inner.as_rule() == Rule::commentary {
118                        commentary = Some(block_inner.as_str().trim().to_string());
119                        break;
120                    }
121                }
122            }
123            Rule::doc_declaration => {
124                for decl_inner in header_item.into_inner() {
125                    if decl_inner.as_rule() == Rule::doc_name_local {
126                        doc_name = Some(decl_inner.as_str().to_string());
127                        break;
128                    }
129                }
130            }
131            _ => {}
132        }
133    }
134
135    let name = doc_name.ok_or_else(|| {
136        LemmaError::engine(
137            "Grammar error: doc missing doc_declaration",
138            Some(crate::parsing::source::Source::new(
139                attribute,
140                Span {
141                    start: 0,
142                    end: 0,
143                    line: 1,
144                    col: 0,
145                },
146                "",
147                source_text.clone(),
148            )),
149            None::<String>,
150        )
151    })?;
152
153    // First pass: collect all named type definitions from doc_body
154    for inner_pair in pair.clone().into_inner() {
155        if inner_pair.as_rule() == Rule::doc_body {
156            for body_item in inner_pair.into_inner() {
157                match body_item.as_rule() {
158                    Rule::type_definition => {
159                        let type_def = crate::parsing::types::parse_type_definition(
160                            body_item,
161                            attribute,
162                            &name,
163                            source_text.clone(),
164                        )?;
165                        types.push(type_def);
166                    }
167                    Rule::type_import => {
168                        let type_def = crate::parsing::types::parse_type_import(
169                            body_item,
170                            attribute,
171                            &name,
172                            source_text.clone(),
173                        )?;
174                        types.push(type_def);
175                    }
176                    _ => {}
177                }
178            }
179        }
180    }
181
182    // Second pass: parse facts and rules from doc_body
183    for inner_pair in pair.into_inner() {
184        if inner_pair.as_rule() == Rule::doc_body {
185            for body_item in inner_pair.into_inner() {
186                match body_item.as_rule() {
187                    Rule::fact_definition => {
188                        let fact = crate::parsing::facts::parse_fact_definition(
189                            body_item,
190                            attribute,
191                            &name,
192                            source_text.clone(),
193                            &types,
194                        )?;
195                        facts.push(fact);
196                    }
197                    Rule::fact_binding => {
198                        let fact = crate::parsing::facts::parse_fact_binding(
199                            body_item,
200                            attribute,
201                            &name,
202                            source_text.clone(),
203                            &types,
204                        )?;
205                        facts.push(fact);
206                    }
207                    Rule::rule_definition => {
208                        let rule = crate::parsing::rules::parse_rule_definition(
209                            body_item,
210                            depth_tracker,
211                            attribute,
212                            &name,
213                            source_text.clone(),
214                        )?;
215                        rules.push(rule);
216                    }
217                    _ => {}
218                }
219            }
220        }
221    }
222    let mut doc = LemmaDoc::new(name)
223        .with_attribute(attribute.to_string())
224        .with_start_line(doc_start_line);
225
226    if let Some(commentary_text) = commentary {
227        doc = doc.set_commentary(commentary_text);
228    }
229
230    for fact in facts {
231        doc = doc.add_fact(fact);
232    }
233    for rule in rules {
234        doc = doc.add_rule(rule);
235    }
236    for type_def in types {
237        doc = doc.add_type(type_def);
238    }
239
240    Ok(doc)
241}
242
243// ============================================================================
244// Tests
245// ============================================================================
246
247#[cfg(test)]
248mod tests {
249    use super::parse;
250    use crate::LemmaError;
251    use crate::ResourceLimits;
252
253    #[test]
254    fn parse_empty_input_returns_no_documents() {
255        let result = parse("", "test.lemma", &ResourceLimits::default()).unwrap();
256        assert_eq!(result.len(), 0);
257    }
258
259    #[test]
260    fn parse_workspace_file_yields_expected_doc_facts_and_rules() {
261        let input = r#"doc person
262fact name = "John Doe"
263rule adult = true"#;
264        let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
265        assert_eq!(result.len(), 1);
266        assert_eq!(result[0].name, "person");
267        assert_eq!(result[0].facts.len(), 1);
268        assert_eq!(result[0].rules.len(), 1);
269        assert_eq!(result[0].rules[0].name, "adult");
270    }
271
272    #[test]
273    fn mixing_facts_and_rules_is_collected_into_doc() {
274        let input = r#"doc test
275fact name = "John"
276rule is_adult = age >= 18
277fact age = 25
278rule can_drink = age >= 21
279fact status = "active"
280rule is_eligible = is_adult and status == "active""#;
281
282        let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
283        assert_eq!(result.len(), 1);
284        assert_eq!(result[0].facts.len(), 3);
285        assert_eq!(result[0].rules.len(), 3);
286    }
287
288    #[test]
289    fn parse_simple_document_collects_facts() {
290        let input = r#"doc person
291fact name = "John"
292fact age = 25"#;
293        let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
294        assert_eq!(result.len(), 1);
295        assert_eq!(result[0].name, "person");
296        assert_eq!(result[0].facts.len(), 2);
297    }
298
299    #[test]
300    fn parse_doc_name_with_slashes_is_preserved() {
301        let input = r#"doc contracts/employment/jack
302fact name = "Jack""#;
303        let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
304        assert_eq!(result.len(), 1);
305        assert_eq!(result[0].name, "contracts/employment/jack");
306    }
307
308    #[test]
309    fn parse_commentary_block_is_attached_to_doc() {
310        let input = r#"doc person
311"""
312This is a markdown comment
313with **bold** text
314"""
315fact name = "John""#;
316        let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
317        assert_eq!(result.len(), 1);
318        assert!(result[0].commentary.is_some());
319        assert!(result[0].commentary.as_ref().unwrap().contains("**bold**"));
320    }
321
322    #[test]
323    fn parse_document_with_rule_collects_rule() {
324        let input = r#"doc person
325rule is_adult = age >= 18"#;
326        let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
327        assert_eq!(result.len(), 1);
328        assert_eq!(result[0].rules.len(), 1);
329        assert_eq!(result[0].rules[0].name, "is_adult");
330    }
331
332    #[test]
333    fn parse_multiple_documents_returns_all_docs() {
334        let input = r#"doc person
335fact name = "John"
336
337doc company
338fact name = "Acme Corp""#;
339        let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
340        assert_eq!(result.len(), 2);
341        assert_eq!(result[0].name, "person");
342        assert_eq!(result[1].name, "company");
343    }
344
345    #[test]
346    fn parse_allows_duplicate_fact_names() {
347        // Duplicate fact names are rejected during planning/validation, not parsing.
348        let input = r#"doc person
349fact name = "John"
350fact name = "Jane""#;
351        let result = parse(input, "test.lemma", &ResourceLimits::default());
352        assert!(
353            result.is_ok(),
354            "Parser should succeed even with duplicate facts"
355        );
356    }
357
358    #[test]
359    fn parse_allows_duplicate_rule_names() {
360        // Duplicate rule names are rejected during planning/validation, not parsing.
361        let input = r#"doc person
362rule is_adult = age >= 18
363rule is_adult = age >= 21"#;
364        let result = parse(input, "test.lemma", &ResourceLimits::default());
365        assert!(
366            result.is_ok(),
367            "Parser should succeed even with duplicate rules"
368        );
369    }
370
371    #[test]
372    fn parse_rejects_malformed_input() {
373        let input = "invalid syntax here";
374        let result = parse(input, "test.lemma", &ResourceLimits::default());
375        assert!(result.is_err());
376    }
377
378    #[test]
379    fn parse_handles_whitespace_variants_in_expressions() {
380        let test_cases = vec![
381            ("doc test\nrule test = 2+3", "no spaces in arithmetic"),
382            ("doc test\nrule test = age>=18", "no spaces in comparison"),
383            (
384                "doc test\nrule test = age >= 18 and salary>50000",
385                "spaces around and keyword",
386            ),
387            (
388                "doc test\nrule test = age  >=  18  and  salary  >  50000",
389                "extra spaces",
390            ),
391            (
392                "doc test\nrule test = \n  age >= 18 \n  and \n  salary > 50000",
393                "newlines in expression",
394            ),
395        ];
396
397        for (input, description) in test_cases {
398            let result = parse(input, "test.lemma", &ResourceLimits::default());
399            assert!(
400                result.is_ok(),
401                "Failed to parse {} ({}): {:?}",
402                input,
403                description,
404                result.err()
405            );
406        }
407    }
408
409    #[test]
410    fn parse_error_cases_are_rejected() {
411        let error_cases = vec![
412            (
413                "doc test\nfact name = \"unclosed string",
414                "unclosed string literal",
415            ),
416            ("doc test\nrule test = 2 + + 3", "double operator"),
417            ("doc test\nrule test = (2 + 3", "unclosed parenthesis"),
418            ("doc test\nrule test = 2 + 3)", "extra closing paren"),
419            // Note: "invalid unit" now parses as a user-defined unit (validated during planning)
420            ("doc test\nfact doc = 123", "reserved keyword as fact name"),
421            (
422                "doc test\nrule rule = true",
423                "reserved keyword as rule name",
424            ),
425        ];
426
427        for (input, description) in error_cases {
428            let result = parse(input, "test.lemma", &ResourceLimits::default());
429            assert!(
430                result.is_err(),
431                "Expected error for {} but got success",
432                description
433            );
434        }
435    }
436
437    #[test]
438    fn parse_duration_literals_in_rules() {
439        let test_cases = vec![
440            ("2 years", "years"),
441            ("6 months", "months"),
442            ("52 weeks", "weeks"),
443            ("365 days", "days"),
444            ("24 hours", "hours"),
445            ("60 minutes", "minutes"),
446            ("3600 seconds", "seconds"),
447            ("1000 milliseconds", "milliseconds"),
448            ("500000 microseconds", "microseconds"),
449            ("50 percent", "percent"),
450        ];
451
452        for (expr, description) in test_cases {
453            let input = format!("doc test\nrule test = {}", expr);
454            let result = parse(&input, "test.lemma", &ResourceLimits::default());
455            assert!(
456                result.is_ok(),
457                "Failed to parse literal {} ({}): {:?}",
458                expr,
459                description,
460                result.err()
461            );
462        }
463    }
464
465    #[test]
466    fn parse_comparisons_with_duration_unit_conversions() {
467        let test_cases = vec![
468            (
469                "(duration in hours) > 2",
470                "duration conversion in comparison with parens",
471            ),
472            (
473                "(meeting_time in minutes) >= 30",
474                "duration conversion with gte",
475            ),
476            (
477                "(project_length in days) < 100",
478                "duration conversion with lt",
479            ),
480            (
481                "(delay in seconds) == 60",
482                "duration conversion with equality",
483            ),
484            (
485                "(1 hours) > (30 minutes)",
486                "duration conversions on both sides",
487            ),
488            (
489                "duration in hours > 2",
490                "duration conversion without parens",
491            ),
492            (
493                "meeting_time in seconds > 3600",
494                "variable duration conversion in comparison",
495            ),
496            (
497                "project_length in days > deadline_days",
498                "two variables with duration conversion",
499            ),
500            (
501                "duration in hours >= 1 and duration in hours <= 8",
502                "multiple duration comparisons",
503            ),
504        ];
505
506        for (expr, description) in test_cases {
507            let input = format!("doc test\nrule test = {}", expr);
508            let result = parse(&input, "test.lemma", &ResourceLimits::default());
509            assert!(
510                result.is_ok(),
511                "Failed to parse {} ({}): {:?}",
512                expr,
513                description,
514                result.err()
515            );
516        }
517    }
518
519    #[test]
520    fn parse_error_includes_attribute_and_parse_error_doc_name() {
521        let result = parse(
522            r#"
523doc test
524fact name = "Unclosed string
525fact age = 25
526"#,
527            "test.lemma",
528            &ResourceLimits::default(),
529        );
530
531        match result {
532            Err(LemmaError::Parse(details)) => {
533                let src = details.source.as_ref().expect("should have source");
534                assert_eq!(src.attribute, "test.lemma");
535                assert_eq!(src.doc_name, "");
536            }
537            Err(e) => panic!("Expected Parse error, got: {e:?}"),
538            Ok(_) => panic!("Expected parse error for unclosed string"),
539        }
540    }
541
542    #[test]
543    fn parse_registry_style_doc_name() {
544        let input = r#"doc user/workspace/somedoc
545fact name = "Alice""#;
546        let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
547        assert_eq!(result.len(), 1);
548        assert_eq!(result[0].name, "user/workspace/somedoc");
549    }
550
551    #[test]
552    fn parse_fact_doc_reference_with_at_prefix() {
553        let input = r#"doc example
554fact external = doc @user/workspace/somedoc"#;
555        let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
556        assert_eq!(result.len(), 1);
557        assert_eq!(result[0].facts.len(), 1);
558        match &result[0].facts[0].value {
559            crate::FactValue::DocumentReference(doc_ref) => {
560                assert_eq!(doc_ref.name, "user/workspace/somedoc");
561                assert!(doc_ref.is_registry, "expected registry reference");
562            }
563            other => panic!("Expected DocumentReference, got: {:?}", other),
564        }
565    }
566
567    #[test]
568    fn parse_type_import_with_at_prefix() {
569        let input = r#"doc example
570type money from @lemma/std/finance
571fact price = [money]"#;
572        let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
573        assert_eq!(result.len(), 1);
574        assert_eq!(result[0].types.len(), 1);
575        match &result[0].types[0] {
576            crate::TypeDef::Import { from, name, .. } => {
577                assert_eq!(from.name, "lemma/std/finance");
578                assert!(from.is_registry, "expected registry reference");
579                assert_eq!(name, "money");
580            }
581            other => panic!("Expected Import type, got: {:?}", other),
582        }
583    }
584
585    #[test]
586    fn parse_multiple_registry_docs_in_same_file() {
587        let input = r#"doc user/workspace/doc_a
588fact x = 10
589
590doc user/workspace/doc_b
591fact y = 20
592fact a = doc @user/workspace/doc_a"#;
593        let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
594        assert_eq!(result.len(), 2);
595        assert_eq!(result[0].name, "user/workspace/doc_a");
596        assert_eq!(result[1].name, "user/workspace/doc_b");
597    }
598
599    #[test]
600    fn parse_error_is_returned_for_garbage_input() {
601        let result = parse(
602            r#"
603doc test
604this is not valid lemma syntax @#$%
605"#,
606            "test.lemma",
607            &ResourceLimits::default(),
608        );
609
610        assert!(result.is_err(), "Should fail on malformed input");
611        match result {
612            Err(LemmaError::Parse { .. }) => {
613                // Expected
614            }
615            Err(e) => panic!("Expected Parse error, got: {e:?}"),
616            Ok(_) => panic!("Expected parse error"),
617        }
618    }
619}