1use crate::error::LemmaError;
2use crate::limits::ResourceLimits;
3use pest::iterators::Pair;
4use pest::Parser;
5use pest_derive::Parser;
6use std::sync::Arc;
7
8pub mod ast;
9pub mod expressions;
10pub mod facts;
11pub mod literals;
12pub mod rules;
13pub mod source;
14pub mod types;
15pub mod units;
16
17pub use ast::{DepthTracker, Span};
18pub use source::Source;
19
20pub use crate::semantic::*;
21
22#[derive(Parser)]
23#[grammar = "src/parsing/lemma.pest"]
24pub struct LemmaParser;
25
26pub fn parse(
27 content: &str,
28 attribute: &str,
29 limits: &ResourceLimits,
30) -> Result<Vec<LemmaDoc>, LemmaError> {
31 if content.len() > limits.max_file_size_bytes {
32 return Err(LemmaError::ResourceLimitExceeded {
33 limit_name: "max_file_size_bytes".to_string(),
34 limit_value: format!(
35 "{} bytes ({} MB)",
36 limits.max_file_size_bytes,
37 limits.max_file_size_bytes / (1024 * 1024)
38 ),
39 actual_value: format!(
40 "{} bytes ({:.2} MB)",
41 content.len(),
42 content.len() as f64 / (1024.0 * 1024.0)
43 ),
44 suggestion: "Reduce file size or split into multiple documents".to_string(),
45 });
46 }
47
48 let mut depth_tracker = DepthTracker::with_max_depth(limits.max_expression_depth);
49
50 match LemmaParser::parse(Rule::lemma_file, content) {
51 Ok(mut pairs) => {
52 let mut docs = Vec::new();
53 if let Some(lemma_file_pair) = pairs.next() {
54 for inner_pair in lemma_file_pair.into_inner() {
55 if inner_pair.as_rule() == Rule::doc {
56 docs.push(parse_doc(inner_pair, attribute, &mut depth_tracker)?);
57 }
58 }
59 }
60 Ok(docs)
61 }
62 Err(e) => {
63 let pest_span = match e.line_col {
64 pest::error::LineColLocation::Pos((line, col)) => Span {
65 start: 0,
66 end: 0,
67 line,
68 col,
69 },
70 pest::error::LineColLocation::Span((start_line, start_col), (_, _)) => Span {
71 start: 0,
72 end: 0,
73 line: start_line,
74 col: start_col,
75 },
76 };
77
78 Err(LemmaError::parse(
79 e.variant.to_string(),
80 pest_span,
81 attribute,
82 Arc::from(content),
83 "<parse-error>",
84 1,
85 None::<String>,
86 ))
87 }
88 }
89}
90
91fn parse_doc(
92 pair: Pair<Rule>,
93 attribute: &str,
94 depth_tracker: &mut DepthTracker,
95) -> Result<LemmaDoc, LemmaError> {
96 let doc_start_line = pair.as_span().start_pos().line_col().0;
97
98 let mut doc_name: Option<String> = None;
99 let mut commentary: Option<String> = None;
100 let mut facts = Vec::new();
101 let mut rules = Vec::new();
102 let mut types = Vec::new();
103
104 for header_item in pair.clone().into_inner() {
106 match header_item.as_rule() {
107 Rule::commentary_block => {
108 for block_inner in header_item.into_inner() {
109 if block_inner.as_rule() == Rule::commentary {
110 commentary = Some(block_inner.as_str().trim().to_string());
111 break;
112 }
113 }
114 }
115 Rule::doc_declaration => {
116 for decl_inner in header_item.into_inner() {
117 if decl_inner.as_rule() == Rule::doc_name {
118 doc_name = Some(decl_inner.as_str().to_string());
119 break;
120 }
121 }
122 }
123 _ => {}
124 }
125 }
126
127 let name = doc_name.ok_or_else(|| {
128 LemmaError::engine(
129 "Grammar error: doc missing doc_declaration",
130 Span {
131 start: 0,
132 end: 0,
133 line: 1,
134 col: 0,
135 },
136 attribute,
137 std::sync::Arc::from(""),
138 "<parse-error>",
139 1,
140 None::<String>,
141 )
142 })?;
143
144 for inner_pair in pair.clone().into_inner() {
150 if inner_pair.as_rule() == Rule::doc_body {
151 for body_item in inner_pair.into_inner() {
152 match body_item.as_rule() {
153 Rule::type_definition => {
154 let type_def = crate::parsing::types::parse_type_definition(
155 body_item, attribute, &name,
156 )?;
157 types.push(type_def);
158 }
159 Rule::type_import => {
160 let type_def =
161 crate::parsing::types::parse_type_import(body_item, attribute, &name)?;
162 types.push(type_def);
163 }
164 _ => {}
165 }
166 }
167 }
168 }
169
170 for inner_pair in pair.into_inner() {
173 if inner_pair.as_rule() == Rule::doc_body {
174 for body_item in inner_pair.into_inner() {
175 match body_item.as_rule() {
176 Rule::fact_definition => {
177 let fact = crate::parsing::facts::parse_fact_definition(
178 body_item, attribute, &name, &types,
179 )?;
180 facts.push(fact);
181 }
182 Rule::fact_override => {
183 let fact = crate::parsing::facts::parse_fact_override(
184 body_item, attribute, &name, &types,
185 )?;
186 facts.push(fact);
187 }
188 Rule::rule_definition => {
189 let rule = crate::parsing::rules::parse_rule_definition(
190 body_item,
191 depth_tracker,
192 attribute,
193 &name,
194 )?;
195 rules.push(rule);
196 }
197 _ => {}
198 }
199 }
200 }
201 }
202 let mut doc = LemmaDoc::new(name)
203 .with_attribute(attribute.to_string())
204 .with_start_line(doc_start_line);
205
206 if let Some(commentary_text) = commentary {
207 doc = doc.set_commentary(commentary_text);
208 }
209
210 for fact in facts {
211 doc = doc.add_fact(fact);
212 }
213 for rule in rules {
214 doc = doc.add_rule(rule);
215 }
216 for type_def in types {
217 doc = doc.add_type(type_def);
218 }
219
220 Ok(doc)
221}
222
223#[cfg(test)]
228mod tests {
229 use super::parse;
230 use crate::LemmaError;
231 use crate::ResourceLimits;
232
233 #[test]
234 fn parse_empty_input_returns_no_documents() {
235 let result = parse("", "test.lemma", &ResourceLimits::default()).unwrap();
236 assert_eq!(result.len(), 0);
237 }
238
239 #[test]
240 fn parse_workspace_file_yields_expected_doc_facts_and_rules() {
241 let input = r#"doc person
242fact name = "John Doe"
243rule adult = true"#;
244 let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
245 assert_eq!(result.len(), 1);
246 assert_eq!(result[0].name, "person");
247 assert_eq!(result[0].facts.len(), 1);
248 assert_eq!(result[0].rules.len(), 1);
249 assert_eq!(result[0].rules[0].name, "adult");
250 }
251
252 #[test]
253 fn mixing_facts_and_rules_is_collected_into_doc() {
254 let input = r#"doc test
255fact name = "John"
256rule is_adult = age >= 18
257fact age = 25
258rule can_drink = age >= 21
259fact status = "active"
260rule is_eligible = is_adult and status == "active""#;
261
262 let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
263 assert_eq!(result.len(), 1);
264 assert_eq!(result[0].facts.len(), 3);
265 assert_eq!(result[0].rules.len(), 3);
266 }
267
268 #[test]
269 fn parse_simple_document_collects_facts() {
270 let input = r#"doc person
271fact name = "John"
272fact age = 25"#;
273 let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
274 assert_eq!(result.len(), 1);
275 assert_eq!(result[0].name, "person");
276 assert_eq!(result[0].facts.len(), 2);
277 }
278
279 #[test]
280 fn parse_doc_name_with_slashes_is_preserved() {
281 let input = r#"doc contracts/employment/jack
282fact name = "Jack""#;
283 let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
284 assert_eq!(result.len(), 1);
285 assert_eq!(result[0].name, "contracts/employment/jack");
286 }
287
288 #[test]
289 fn parse_commentary_block_is_attached_to_doc() {
290 let input = r#"doc person
291"""
292This is a markdown comment
293with **bold** text
294"""
295fact name = "John""#;
296 let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
297 assert_eq!(result.len(), 1);
298 assert!(result[0].commentary.is_some());
299 assert!(result[0].commentary.as_ref().unwrap().contains("**bold**"));
300 }
301
302 #[test]
303 fn parse_document_with_rule_collects_rule() {
304 let input = r#"doc person
305rule is_adult = age >= 18"#;
306 let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
307 assert_eq!(result.len(), 1);
308 assert_eq!(result[0].rules.len(), 1);
309 assert_eq!(result[0].rules[0].name, "is_adult");
310 }
311
312 #[test]
313 fn parse_multiple_documents_returns_all_docs() {
314 let input = r#"doc person
315fact name = "John"
316
317doc company
318fact name = "Acme Corp""#;
319 let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
320 assert_eq!(result.len(), 2);
321 assert_eq!(result[0].name, "person");
322 assert_eq!(result[1].name, "company");
323 }
324
325 #[test]
326 fn parse_allows_duplicate_fact_names() {
327 let input = r#"doc person
329fact name = "John"
330fact name = "Jane""#;
331 let result = parse(input, "test.lemma", &ResourceLimits::default());
332 assert!(
333 result.is_ok(),
334 "Parser should succeed even with duplicate facts"
335 );
336 }
337
338 #[test]
339 fn parse_allows_duplicate_rule_names() {
340 let input = r#"doc person
342rule is_adult = age >= 18
343rule is_adult = age >= 21"#;
344 let result = parse(input, "test.lemma", &ResourceLimits::default());
345 assert!(
346 result.is_ok(),
347 "Parser should succeed even with duplicate rules"
348 );
349 }
350
351 #[test]
352 fn parse_rejects_malformed_input() {
353 let input = "invalid syntax here";
354 let result = parse(input, "test.lemma", &ResourceLimits::default());
355 assert!(result.is_err());
356 }
357
358 #[test]
359 fn parse_handles_whitespace_variants_in_expressions() {
360 let test_cases = vec![
361 ("doc test\nrule test = 2+3", "no spaces in arithmetic"),
362 ("doc test\nrule test = age>=18", "no spaces in comparison"),
363 (
364 "doc test\nrule test = age >= 18 and salary>50000",
365 "spaces around and keyword",
366 ),
367 (
368 "doc test\nrule test = age >= 18 and salary > 50000",
369 "extra spaces",
370 ),
371 (
372 "doc test\nrule test = \n age >= 18 \n and \n salary > 50000",
373 "newlines in expression",
374 ),
375 ];
376
377 for (input, description) in test_cases {
378 let result = parse(input, "test.lemma", &ResourceLimits::default());
379 assert!(
380 result.is_ok(),
381 "Failed to parse {} ({}): {:?}",
382 input,
383 description,
384 result.err()
385 );
386 }
387 }
388
389 #[test]
390 fn parse_error_cases_are_rejected() {
391 let error_cases = vec![
392 (
393 "doc test\nfact name = \"unclosed string",
394 "unclosed string literal",
395 ),
396 ("doc test\nrule test = 2 + + 3", "double operator"),
397 ("doc test\nrule test = (2 + 3", "unclosed parenthesis"),
398 ("doc test\nrule test = 2 + 3)", "extra closing paren"),
399 ("doc test\nfact doc = 123", "reserved keyword as fact name"),
401 (
402 "doc test\nrule rule = true",
403 "reserved keyword as rule name",
404 ),
405 ];
406
407 for (input, description) in error_cases {
408 let result = parse(input, "test.lemma", &ResourceLimits::default());
409 assert!(
410 result.is_err(),
411 "Expected error for {} but got success",
412 description
413 );
414 }
415 }
416
417 #[test]
418 fn parse_duration_literals_in_rules() {
419 let test_cases = vec![
421 ("2 years", "years"),
422 ("6 months", "months"),
423 ("52 weeks", "weeks"),
424 ("365 days", "days"),
425 ("24 hours", "hours"),
426 ("60 minutes", "minutes"),
427 ("3600 seconds", "seconds"),
428 ("1000 milliseconds", "milliseconds"),
429 ("500000 microseconds", "microseconds"),
430 ("50 percent", "percent"),
431 ];
432
433 for (expr, description) in test_cases {
434 let input = format!("doc test\nrule test = {}", expr);
435 let result = parse(&input, "test.lemma", &ResourceLimits::default());
436 assert!(
437 result.is_ok(),
438 "Failed to parse literal {} ({}): {:?}",
439 expr,
440 description,
441 result.err()
442 );
443 }
444 }
445
446 #[test]
447 fn parse_comparisons_with_duration_unit_conversions() {
448 let test_cases = vec![
450 (
451 "(duration in hours) > 2",
452 "duration conversion in comparison with parens",
453 ),
454 (
455 "(meeting_time in minutes) >= 30",
456 "duration conversion with gte",
457 ),
458 (
459 "(project_length in days) < 100",
460 "duration conversion with lt",
461 ),
462 (
463 "(delay in seconds) == 60",
464 "duration conversion with equality",
465 ),
466 (
467 "(1 hours) > (30 minutes)",
468 "duration conversions on both sides",
469 ),
470 (
471 "duration in hours > 2",
472 "duration conversion without parens",
473 ),
474 (
475 "meeting_time in seconds > 3600",
476 "variable duration conversion in comparison",
477 ),
478 (
479 "project_length in days > deadline_days",
480 "two variables with duration conversion",
481 ),
482 (
483 "duration in hours >= 1 and duration in hours <= 8",
484 "multiple duration comparisons",
485 ),
486 ];
487
488 for (expr, description) in test_cases {
489 let input = format!("doc test\nrule test = {}", expr);
490 let result = parse(&input, "test.lemma", &ResourceLimits::default());
491 assert!(
492 result.is_ok(),
493 "Failed to parse {} ({}): {:?}",
494 expr,
495 description,
496 result.err()
497 );
498 }
499 }
500
501 #[test]
502 fn parse_error_includes_attribute_and_parse_error_doc_name() {
503 let result = parse(
504 r#"
505doc test
506fact name = "Unclosed string
507fact age = 25
508"#,
509 "test.lemma",
510 &ResourceLimits::default(),
511 );
512
513 match result {
514 Err(LemmaError::Parse(details)) => {
515 assert_eq!(details.source_location.attribute, "test.lemma");
516 assert_eq!(details.source_location.doc_name, "<parse-error>");
517 }
518 Err(e) => panic!("Expected Parse error, got: {e:?}"),
519 Ok(_) => panic!("Expected parse error for unclosed string"),
520 }
521 }
522
523 #[test]
524 fn parse_error_is_returned_for_garbage_input() {
525 let result = parse(
526 r#"
527doc test
528this is not valid lemma syntax @#$%
529"#,
530 "test.lemma",
531 &ResourceLimits::default(),
532 );
533
534 assert!(result.is_err(), "Should fail on malformed input");
535 match result {
536 Err(LemmaError::Parse { .. }) => {
537 }
539 Err(e) => panic!("Expected Parse error, got: {e:?}"),
540 Ok(_) => panic!("Expected parse error"),
541 }
542 }
543}