1use crate::error::LemmaError;
2use crate::limits::ResourceLimits;
3use pest::iterators::Pair;
4use pest::Parser;
5use pest_derive::Parser;
6use std::sync::Arc;
7
8pub mod ast;
9pub mod expressions;
10pub mod facts;
11pub mod literals;
12pub mod rules;
13pub mod source;
14pub mod types;
15
16pub use ast::{DepthTracker, Span};
17pub use source::Source;
18
19pub use ast::*;
20
21#[derive(Parser)]
22#[grammar = "src/parsing/lemma.pest"]
23pub struct LemmaParser;
24
25pub fn parse(
26 content: &str,
27 attribute: &str,
28 limits: &ResourceLimits,
29) -> Result<Vec<LemmaDoc>, LemmaError> {
30 if content.len() > limits.max_file_size_bytes {
31 return Err(LemmaError::ResourceLimitExceeded {
32 limit_name: "max_file_size_bytes".to_string(),
33 limit_value: format!(
34 "{} bytes ({} MB)",
35 limits.max_file_size_bytes,
36 limits.max_file_size_bytes / (1024 * 1024)
37 ),
38 actual_value: format!(
39 "{} bytes ({:.2} MB)",
40 content.len(),
41 content.len() as f64 / (1024.0 * 1024.0)
42 ),
43 suggestion: "Reduce file size or split into multiple documents".to_string(),
44 });
45 }
46
47 let mut depth_tracker = DepthTracker::with_max_depth(limits.max_expression_depth);
48
49 let source_text: Arc<str> = Arc::from(content);
50
51 match LemmaParser::parse(Rule::lemma_file, content) {
52 Ok(mut pairs) => {
53 let mut docs = Vec::new();
54 if let Some(lemma_file_pair) = pairs.next() {
55 for inner_pair in lemma_file_pair.into_inner() {
56 if inner_pair.as_rule() == Rule::doc {
57 docs.push(parse_doc(
58 inner_pair,
59 attribute,
60 &mut depth_tracker,
61 source_text.clone(),
62 )?);
63 }
64 }
65 }
66 Ok(docs)
67 }
68 Err(e) => {
69 let pest_span = match e.line_col {
70 pest::error::LineColLocation::Pos((line, col)) => Span {
71 start: 0,
72 end: 0,
73 line,
74 col,
75 },
76 pest::error::LineColLocation::Span((start_line, start_col), (_, _)) => Span {
77 start: 0,
78 end: 0,
79 line: start_line,
80 col: start_col,
81 },
82 };
83
84 Err(LemmaError::parse(
85 e.variant.to_string(),
86 Some(crate::parsing::source::Source::new(
87 attribute,
88 pest_span,
89 "",
90 source_text,
91 )),
92 None::<String>,
93 ))
94 }
95 }
96}
97
98fn parse_doc(
99 pair: Pair<Rule>,
100 attribute: &str,
101 depth_tracker: &mut DepthTracker,
102 source_text: Arc<str>,
103) -> Result<LemmaDoc, LemmaError> {
104 let doc_start_line = pair.as_span().start_pos().line_col().0;
105
106 let mut doc_name: Option<String> = None;
107 let mut commentary: Option<String> = None;
108 let mut facts = Vec::new();
109 let mut rules = Vec::new();
110 let mut types = Vec::new();
111
112 for header_item in pair.clone().into_inner() {
114 match header_item.as_rule() {
115 Rule::commentary_block => {
116 for block_inner in header_item.into_inner() {
117 if block_inner.as_rule() == Rule::commentary {
118 commentary = Some(block_inner.as_str().trim().to_string());
119 break;
120 }
121 }
122 }
123 Rule::doc_declaration => {
124 for decl_inner in header_item.into_inner() {
125 if decl_inner.as_rule() == Rule::doc_name_local {
126 doc_name = Some(decl_inner.as_str().to_string());
127 break;
128 }
129 }
130 }
131 _ => {}
132 }
133 }
134
135 let name = doc_name.ok_or_else(|| {
136 LemmaError::engine(
137 "Grammar error: doc missing doc_declaration",
138 Some(crate::parsing::source::Source::new(
139 attribute,
140 Span {
141 start: 0,
142 end: 0,
143 line: 1,
144 col: 0,
145 },
146 "",
147 source_text.clone(),
148 )),
149 None::<String>,
150 )
151 })?;
152
153 for inner_pair in pair.clone().into_inner() {
155 if inner_pair.as_rule() == Rule::doc_body {
156 for body_item in inner_pair.into_inner() {
157 match body_item.as_rule() {
158 Rule::type_definition => {
159 let type_def = crate::parsing::types::parse_type_definition(
160 body_item,
161 attribute,
162 &name,
163 source_text.clone(),
164 )?;
165 types.push(type_def);
166 }
167 Rule::type_import => {
168 let type_def = crate::parsing::types::parse_type_import(
169 body_item,
170 attribute,
171 &name,
172 source_text.clone(),
173 )?;
174 types.push(type_def);
175 }
176 _ => {}
177 }
178 }
179 }
180 }
181
182 for inner_pair in pair.into_inner() {
184 if inner_pair.as_rule() == Rule::doc_body {
185 for body_item in inner_pair.into_inner() {
186 match body_item.as_rule() {
187 Rule::fact_definition => {
188 let fact = crate::parsing::facts::parse_fact_definition(
189 body_item,
190 attribute,
191 &name,
192 source_text.clone(),
193 &types,
194 )?;
195 facts.push(fact);
196 }
197 Rule::fact_binding => {
198 let fact = crate::parsing::facts::parse_fact_binding(
199 body_item,
200 attribute,
201 &name,
202 source_text.clone(),
203 &types,
204 )?;
205 facts.push(fact);
206 }
207 Rule::rule_definition => {
208 let rule = crate::parsing::rules::parse_rule_definition(
209 body_item,
210 depth_tracker,
211 attribute,
212 &name,
213 source_text.clone(),
214 )?;
215 rules.push(rule);
216 }
217 _ => {}
218 }
219 }
220 }
221 }
222 let mut doc = LemmaDoc::new(name)
223 .with_attribute(attribute.to_string())
224 .with_start_line(doc_start_line);
225
226 if let Some(commentary_text) = commentary {
227 doc = doc.set_commentary(commentary_text);
228 }
229
230 for fact in facts {
231 doc = doc.add_fact(fact);
232 }
233 for rule in rules {
234 doc = doc.add_rule(rule);
235 }
236 for type_def in types {
237 doc = doc.add_type(type_def);
238 }
239
240 Ok(doc)
241}
242
243#[cfg(test)]
248mod tests {
249 use super::parse;
250 use crate::LemmaError;
251 use crate::ResourceLimits;
252
253 #[test]
254 fn parse_empty_input_returns_no_documents() {
255 let result = parse("", "test.lemma", &ResourceLimits::default()).unwrap();
256 assert_eq!(result.len(), 0);
257 }
258
259 #[test]
260 fn parse_workspace_file_yields_expected_doc_facts_and_rules() {
261 let input = r#"doc person
262fact name = "John Doe"
263rule adult = true"#;
264 let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
265 assert_eq!(result.len(), 1);
266 assert_eq!(result[0].name, "person");
267 assert_eq!(result[0].facts.len(), 1);
268 assert_eq!(result[0].rules.len(), 1);
269 assert_eq!(result[0].rules[0].name, "adult");
270 }
271
272 #[test]
273 fn mixing_facts_and_rules_is_collected_into_doc() {
274 let input = r#"doc test
275fact name = "John"
276rule is_adult = age >= 18
277fact age = 25
278rule can_drink = age >= 21
279fact status = "active"
280rule is_eligible = is_adult and status == "active""#;
281
282 let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
283 assert_eq!(result.len(), 1);
284 assert_eq!(result[0].facts.len(), 3);
285 assert_eq!(result[0].rules.len(), 3);
286 }
287
288 #[test]
289 fn parse_simple_document_collects_facts() {
290 let input = r#"doc person
291fact name = "John"
292fact age = 25"#;
293 let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
294 assert_eq!(result.len(), 1);
295 assert_eq!(result[0].name, "person");
296 assert_eq!(result[0].facts.len(), 2);
297 }
298
299 #[test]
300 fn parse_doc_name_with_slashes_is_preserved() {
301 let input = r#"doc contracts/employment/jack
302fact name = "Jack""#;
303 let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
304 assert_eq!(result.len(), 1);
305 assert_eq!(result[0].name, "contracts/employment/jack");
306 }
307
308 #[test]
309 fn parse_commentary_block_is_attached_to_doc() {
310 let input = r#"doc person
311"""
312This is a markdown comment
313with **bold** text
314"""
315fact name = "John""#;
316 let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
317 assert_eq!(result.len(), 1);
318 assert!(result[0].commentary.is_some());
319 assert!(result[0].commentary.as_ref().unwrap().contains("**bold**"));
320 }
321
322 #[test]
323 fn parse_document_with_rule_collects_rule() {
324 let input = r#"doc person
325rule is_adult = age >= 18"#;
326 let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
327 assert_eq!(result.len(), 1);
328 assert_eq!(result[0].rules.len(), 1);
329 assert_eq!(result[0].rules[0].name, "is_adult");
330 }
331
332 #[test]
333 fn parse_multiple_documents_returns_all_docs() {
334 let input = r#"doc person
335fact name = "John"
336
337doc company
338fact name = "Acme Corp""#;
339 let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
340 assert_eq!(result.len(), 2);
341 assert_eq!(result[0].name, "person");
342 assert_eq!(result[1].name, "company");
343 }
344
345 #[test]
346 fn parse_allows_duplicate_fact_names() {
347 let input = r#"doc person
349fact name = "John"
350fact name = "Jane""#;
351 let result = parse(input, "test.lemma", &ResourceLimits::default());
352 assert!(
353 result.is_ok(),
354 "Parser should succeed even with duplicate facts"
355 );
356 }
357
358 #[test]
359 fn parse_allows_duplicate_rule_names() {
360 let input = r#"doc person
362rule is_adult = age >= 18
363rule is_adult = age >= 21"#;
364 let result = parse(input, "test.lemma", &ResourceLimits::default());
365 assert!(
366 result.is_ok(),
367 "Parser should succeed even with duplicate rules"
368 );
369 }
370
371 #[test]
372 fn parse_rejects_malformed_input() {
373 let input = "invalid syntax here";
374 let result = parse(input, "test.lemma", &ResourceLimits::default());
375 assert!(result.is_err());
376 }
377
378 #[test]
379 fn parse_handles_whitespace_variants_in_expressions() {
380 let test_cases = vec![
381 ("doc test\nrule test = 2+3", "no spaces in arithmetic"),
382 ("doc test\nrule test = age>=18", "no spaces in comparison"),
383 (
384 "doc test\nrule test = age >= 18 and salary>50000",
385 "spaces around and keyword",
386 ),
387 (
388 "doc test\nrule test = age >= 18 and salary > 50000",
389 "extra spaces",
390 ),
391 (
392 "doc test\nrule test = \n age >= 18 \n and \n salary > 50000",
393 "newlines in expression",
394 ),
395 ];
396
397 for (input, description) in test_cases {
398 let result = parse(input, "test.lemma", &ResourceLimits::default());
399 assert!(
400 result.is_ok(),
401 "Failed to parse {} ({}): {:?}",
402 input,
403 description,
404 result.err()
405 );
406 }
407 }
408
409 #[test]
410 fn parse_error_cases_are_rejected() {
411 let error_cases = vec![
412 (
413 "doc test\nfact name = \"unclosed string",
414 "unclosed string literal",
415 ),
416 ("doc test\nrule test = 2 + + 3", "double operator"),
417 ("doc test\nrule test = (2 + 3", "unclosed parenthesis"),
418 ("doc test\nrule test = 2 + 3)", "extra closing paren"),
419 ("doc test\nfact doc = 123", "reserved keyword as fact name"),
421 (
422 "doc test\nrule rule = true",
423 "reserved keyword as rule name",
424 ),
425 ];
426
427 for (input, description) in error_cases {
428 let result = parse(input, "test.lemma", &ResourceLimits::default());
429 assert!(
430 result.is_err(),
431 "Expected error for {} but got success",
432 description
433 );
434 }
435 }
436
437 #[test]
438 fn parse_duration_literals_in_rules() {
439 let test_cases = vec![
440 ("2 years", "years"),
441 ("6 months", "months"),
442 ("52 weeks", "weeks"),
443 ("365 days", "days"),
444 ("24 hours", "hours"),
445 ("60 minutes", "minutes"),
446 ("3600 seconds", "seconds"),
447 ("1000 milliseconds", "milliseconds"),
448 ("500000 microseconds", "microseconds"),
449 ("50 percent", "percent"),
450 ];
451
452 for (expr, description) in test_cases {
453 let input = format!("doc test\nrule test = {}", expr);
454 let result = parse(&input, "test.lemma", &ResourceLimits::default());
455 assert!(
456 result.is_ok(),
457 "Failed to parse literal {} ({}): {:?}",
458 expr,
459 description,
460 result.err()
461 );
462 }
463 }
464
465 #[test]
466 fn parse_comparisons_with_duration_unit_conversions() {
467 let test_cases = vec![
468 (
469 "(duration in hours) > 2",
470 "duration conversion in comparison with parens",
471 ),
472 (
473 "(meeting_time in minutes) >= 30",
474 "duration conversion with gte",
475 ),
476 (
477 "(project_length in days) < 100",
478 "duration conversion with lt",
479 ),
480 (
481 "(delay in seconds) == 60",
482 "duration conversion with equality",
483 ),
484 (
485 "(1 hours) > (30 minutes)",
486 "duration conversions on both sides",
487 ),
488 (
489 "duration in hours > 2",
490 "duration conversion without parens",
491 ),
492 (
493 "meeting_time in seconds > 3600",
494 "variable duration conversion in comparison",
495 ),
496 (
497 "project_length in days > deadline_days",
498 "two variables with duration conversion",
499 ),
500 (
501 "duration in hours >= 1 and duration in hours <= 8",
502 "multiple duration comparisons",
503 ),
504 ];
505
506 for (expr, description) in test_cases {
507 let input = format!("doc test\nrule test = {}", expr);
508 let result = parse(&input, "test.lemma", &ResourceLimits::default());
509 assert!(
510 result.is_ok(),
511 "Failed to parse {} ({}): {:?}",
512 expr,
513 description,
514 result.err()
515 );
516 }
517 }
518
519 #[test]
520 fn parse_error_includes_attribute_and_parse_error_doc_name() {
521 let result = parse(
522 r#"
523doc test
524fact name = "Unclosed string
525fact age = 25
526"#,
527 "test.lemma",
528 &ResourceLimits::default(),
529 );
530
531 match result {
532 Err(LemmaError::Parse(details)) => {
533 let src = details.source.as_ref().expect("should have source");
534 assert_eq!(src.attribute, "test.lemma");
535 assert_eq!(src.doc_name, "");
536 }
537 Err(e) => panic!("Expected Parse error, got: {e:?}"),
538 Ok(_) => panic!("Expected parse error for unclosed string"),
539 }
540 }
541
542 #[test]
543 fn parse_registry_style_doc_name() {
544 let input = r#"doc user/workspace/somedoc
545fact name = "Alice""#;
546 let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
547 assert_eq!(result.len(), 1);
548 assert_eq!(result[0].name, "user/workspace/somedoc");
549 }
550
551 #[test]
552 fn parse_fact_doc_reference_with_at_prefix() {
553 let input = r#"doc example
554fact external = doc @user/workspace/somedoc"#;
555 let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
556 assert_eq!(result.len(), 1);
557 assert_eq!(result[0].facts.len(), 1);
558 match &result[0].facts[0].value {
559 crate::FactValue::DocumentReference(doc_ref) => {
560 assert_eq!(doc_ref.name, "user/workspace/somedoc");
561 assert!(doc_ref.is_registry, "expected registry reference");
562 }
563 other => panic!("Expected DocumentReference, got: {:?}", other),
564 }
565 }
566
567 #[test]
568 fn parse_type_import_with_at_prefix() {
569 let input = r#"doc example
570type money from @lemma/std/finance
571fact price = [money]"#;
572 let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
573 assert_eq!(result.len(), 1);
574 assert_eq!(result[0].types.len(), 1);
575 match &result[0].types[0] {
576 crate::TypeDef::Import { from, name, .. } => {
577 assert_eq!(from.name, "lemma/std/finance");
578 assert!(from.is_registry, "expected registry reference");
579 assert_eq!(name, "money");
580 }
581 other => panic!("Expected Import type, got: {:?}", other),
582 }
583 }
584
585 #[test]
586 fn parse_multiple_registry_docs_in_same_file() {
587 let input = r#"doc user/workspace/doc_a
588fact x = 10
589
590doc user/workspace/doc_b
591fact y = 20
592fact a = doc @user/workspace/doc_a"#;
593 let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
594 assert_eq!(result.len(), 2);
595 assert_eq!(result[0].name, "user/workspace/doc_a");
596 assert_eq!(result[1].name, "user/workspace/doc_b");
597 }
598
599 #[test]
600 fn parse_error_is_returned_for_garbage_input() {
601 let result = parse(
602 r#"
603doc test
604this is not valid lemma syntax @#$%
605"#,
606 "test.lemma",
607 &ResourceLimits::default(),
608 );
609
610 assert!(result.is_err(), "Should fail on malformed input");
611 match result {
612 Err(LemmaError::Parse { .. }) => {
613 }
615 Err(e) => panic!("Expected Parse error, got: {e:?}"),
616 Ok(_) => panic!("Expected parse error"),
617 }
618 }
619}