1use crate::error::LemmaError;
2use crate::limits::ResourceLimits;
3use pest::iterators::Pair;
4use pest::Parser;
5use pest_derive::Parser;
6use std::sync::Arc;
7
8pub mod ast;
9pub mod expressions;
10pub mod facts;
11pub mod literals;
12pub mod rules;
13pub mod source;
14pub mod types;
15pub mod units;
16
17pub use ast::{DepthTracker, Span};
18pub use source::Source;
19
20pub use crate::semantic::*;
21
22#[derive(Parser)]
23#[grammar = "src/parsing/lemma.pest"]
24pub struct LemmaParser;
25
26pub fn parse(
27 content: &str,
28 attribute: &str,
29 limits: &ResourceLimits,
30) -> Result<Vec<LemmaDoc>, LemmaError> {
31 if content.len() > limits.max_file_size_bytes {
32 return Err(LemmaError::ResourceLimitExceeded {
33 limit_name: "max_file_size_bytes".to_string(),
34 limit_value: format!(
35 "{} bytes ({} MB)",
36 limits.max_file_size_bytes,
37 limits.max_file_size_bytes / (1024 * 1024)
38 ),
39 actual_value: format!(
40 "{} bytes ({:.2} MB)",
41 content.len(),
42 content.len() as f64 / (1024.0 * 1024.0)
43 ),
44 suggestion: "Reduce file size or split into multiple documents".to_string(),
45 });
46 }
47
48 let mut depth_tracker = DepthTracker::with_max_depth(limits.max_expression_depth);
49
50 match LemmaParser::parse(Rule::lemma_file, content) {
51 Ok(mut pairs) => {
52 let mut docs = Vec::new();
53 if let Some(lemma_file_pair) = pairs.next() {
54 for inner_pair in lemma_file_pair.into_inner() {
55 if inner_pair.as_rule() == Rule::doc {
56 docs.push(parse_doc(inner_pair, attribute, &mut depth_tracker)?);
57 }
58 }
59 }
60 Ok(docs)
61 }
62 Err(e) => {
63 let pest_span = match e.line_col {
64 pest::error::LineColLocation::Pos((line, col)) => Span {
65 start: 0,
66 end: 0,
67 line,
68 col,
69 },
70 pest::error::LineColLocation::Span((start_line, start_col), (_, _)) => Span {
71 start: 0,
72 end: 0,
73 line: start_line,
74 col: start_col,
75 },
76 };
77
78 Err(LemmaError::parse(
79 format!("Parse error: {}", e.variant),
80 pest_span,
81 attribute,
82 Arc::from(content),
83 "<parse-error>",
84 1,
85 None::<String>,
86 ))
87 }
88 }
89}
90
91fn parse_doc(
92 pair: Pair<Rule>,
93 attribute: &str,
94 depth_tracker: &mut DepthTracker,
95) -> Result<LemmaDoc, LemmaError> {
96 let doc_start_line = pair.as_span().start_pos().line_col().0;
97
98 let mut doc_name: Option<String> = None;
99 let mut commentary: Option<String> = None;
100 let mut facts = Vec::new();
101 let mut rules = Vec::new();
102 let mut types = Vec::new();
103
104 for header_item in pair.clone().into_inner() {
106 match header_item.as_rule() {
107 Rule::commentary_block => {
108 for block_inner in header_item.into_inner() {
109 if block_inner.as_rule() == Rule::commentary {
110 commentary = Some(block_inner.as_str().trim().to_string());
111 break;
112 }
113 }
114 }
115 Rule::doc_declaration => {
116 for decl_inner in header_item.into_inner() {
117 if decl_inner.as_rule() == Rule::doc_name {
118 doc_name = Some(decl_inner.as_str().to_string());
119 break;
120 }
121 }
122 }
123 _ => {}
124 }
125 }
126
127 let name = doc_name.ok_or_else(|| {
128 LemmaError::engine(
129 "Grammar error: doc missing doc_declaration",
130 Span {
131 start: 0,
132 end: 0,
133 line: 1,
134 col: 0,
135 },
136 "<unknown>",
137 std::sync::Arc::from(""),
138 "<unknown>",
139 1,
140 None::<String>,
141 )
142 })?;
143
144 for inner_pair in pair.clone().into_inner() {
150 if inner_pair.as_rule() == Rule::doc_body {
151 for body_item in inner_pair.into_inner() {
152 match body_item.as_rule() {
153 Rule::type_definition => {
154 let type_def = crate::parsing::types::parse_type_definition(body_item)?;
155 types.push(type_def);
156 }
157 Rule::type_import => {
158 let type_def = crate::parsing::types::parse_type_import(body_item)?;
159 types.push(type_def);
160 }
161 _ => {}
162 }
163 }
164 }
165 }
166
167 for inner_pair in pair.into_inner() {
170 if inner_pair.as_rule() == Rule::doc_body {
171 for body_item in inner_pair.into_inner() {
172 match body_item.as_rule() {
173 Rule::fact_definition => {
174 let fact = crate::parsing::facts::parse_fact_definition(
175 body_item, attribute, &name, &types,
176 )?;
177 facts.push(fact);
178 }
179 Rule::fact_override => {
180 let fact = crate::parsing::facts::parse_fact_override(
181 body_item, attribute, &name, &types,
182 )?;
183 facts.push(fact);
184 }
185 Rule::rule_definition => {
186 let rule = crate::parsing::rules::parse_rule_definition(
187 body_item,
188 depth_tracker,
189 attribute,
190 &name,
191 )?;
192 rules.push(rule);
193 }
194 _ => {}
195 }
196 }
197 }
198 }
199 let mut doc = LemmaDoc::new(name)
200 .with_attribute(attribute.to_string())
201 .with_start_line(doc_start_line);
202
203 if let Some(commentary_text) = commentary {
204 doc = doc.set_commentary(commentary_text);
205 }
206
207 for fact in facts {
208 doc = doc.add_fact(fact);
209 }
210 for rule in rules {
211 doc = doc.add_rule(rule);
212 }
213 for type_def in types {
214 doc = doc.add_type(type_def);
215 }
216
217 Ok(doc)
218}
219
220#[cfg(test)]
225mod tests {
226 use super::parse;
227 use crate::LemmaError;
228 use crate::ResourceLimits;
229
230 #[test]
231 fn parse_empty_input_returns_no_documents() {
232 let result = parse("", "test.lemma", &ResourceLimits::default()).unwrap();
233 assert_eq!(result.len(), 0);
234 }
235
236 #[test]
237 fn parse_workspace_file_yields_expected_doc_facts_and_rules() {
238 let input = r#"doc person
239fact name = "John Doe"
240rule adult = true"#;
241 let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
242 assert_eq!(result.len(), 1);
243 assert_eq!(result[0].name, "person");
244 assert_eq!(result[0].facts.len(), 1);
245 assert_eq!(result[0].rules.len(), 1);
246 assert_eq!(result[0].rules[0].name, "adult");
247 }
248
249 #[test]
250 fn mixing_facts_and_rules_is_collected_into_doc() {
251 let input = r#"doc test
252fact name = "John"
253rule is_adult = age >= 18
254fact age = 25
255rule can_drink = age >= 21
256fact status = "active"
257rule is_eligible = is_adult and status == "active""#;
258
259 let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
260 assert_eq!(result.len(), 1);
261 assert_eq!(result[0].facts.len(), 3);
262 assert_eq!(result[0].rules.len(), 3);
263 }
264
265 #[test]
266 fn parse_simple_document_collects_facts() {
267 let input = r#"doc person
268fact name = "John"
269fact age = 25"#;
270 let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
271 assert_eq!(result.len(), 1);
272 assert_eq!(result[0].name, "person");
273 assert_eq!(result[0].facts.len(), 2);
274 }
275
276 #[test]
277 fn parse_doc_name_with_slashes_is_preserved() {
278 let input = r#"doc contracts/employment/jack
279fact name = "Jack""#;
280 let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
281 assert_eq!(result.len(), 1);
282 assert_eq!(result[0].name, "contracts/employment/jack");
283 }
284
285 #[test]
286 fn parse_commentary_block_is_attached_to_doc() {
287 let input = r#"doc person
288"""
289This is a markdown comment
290with **bold** text
291"""
292fact name = "John""#;
293 let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
294 assert_eq!(result.len(), 1);
295 assert!(result[0].commentary.is_some());
296 assert!(result[0].commentary.as_ref().unwrap().contains("**bold**"));
297 }
298
299 #[test]
300 fn parse_document_with_rule_collects_rule() {
301 let input = r#"doc person
302rule is_adult = age >= 18"#;
303 let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
304 assert_eq!(result.len(), 1);
305 assert_eq!(result[0].rules.len(), 1);
306 assert_eq!(result[0].rules[0].name, "is_adult");
307 }
308
309 #[test]
310 fn parse_multiple_documents_returns_all_docs() {
311 let input = r#"doc person
312fact name = "John"
313
314doc company
315fact name = "Acme Corp""#;
316 let result = parse(input, "test.lemma", &ResourceLimits::default()).unwrap();
317 assert_eq!(result.len(), 2);
318 assert_eq!(result[0].name, "person");
319 assert_eq!(result[1].name, "company");
320 }
321
322 #[test]
323 fn parse_allows_duplicate_fact_names() {
324 let input = r#"doc person
326fact name = "John"
327fact name = "Jane""#;
328 let result = parse(input, "test.lemma", &ResourceLimits::default());
329 assert!(
330 result.is_ok(),
331 "Parser should succeed even with duplicate facts"
332 );
333 }
334
335 #[test]
336 fn parse_allows_duplicate_rule_names() {
337 let input = r#"doc person
339rule is_adult = age >= 18
340rule is_adult = age >= 21"#;
341 let result = parse(input, "test.lemma", &ResourceLimits::default());
342 assert!(
343 result.is_ok(),
344 "Parser should succeed even with duplicate rules"
345 );
346 }
347
348 #[test]
349 fn parse_rejects_malformed_input() {
350 let input = "invalid syntax here";
351 let result = parse(input, "test.lemma", &ResourceLimits::default());
352 assert!(result.is_err());
353 }
354
355 #[test]
356 fn parse_handles_whitespace_variants_in_expressions() {
357 let test_cases = vec![
358 ("doc test\nrule test = 2+3", "no spaces in arithmetic"),
359 ("doc test\nrule test = age>=18", "no spaces in comparison"),
360 (
361 "doc test\nrule test = age >= 18 and salary>50000",
362 "spaces around and keyword",
363 ),
364 (
365 "doc test\nrule test = age >= 18 and salary > 50000",
366 "extra spaces",
367 ),
368 (
369 "doc test\nrule test = \n age >= 18 \n and \n salary > 50000",
370 "newlines in expression",
371 ),
372 ];
373
374 for (input, description) in test_cases {
375 let result = parse(input, "test.lemma", &ResourceLimits::default());
376 assert!(
377 result.is_ok(),
378 "Failed to parse {} ({}): {:?}",
379 input,
380 description,
381 result.err()
382 );
383 }
384 }
385
386 #[test]
387 fn parse_error_cases_are_rejected() {
388 let error_cases = vec![
389 (
390 "doc test\nfact name = \"unclosed string",
391 "unclosed string literal",
392 ),
393 ("doc test\nrule test = 2 + + 3", "double operator"),
394 ("doc test\nrule test = (2 + 3", "unclosed parenthesis"),
395 ("doc test\nrule test = 2 + 3)", "extra closing paren"),
396 ("doc test\nfact doc = 123", "reserved keyword as fact name"),
398 (
399 "doc test\nrule rule = true",
400 "reserved keyword as rule name",
401 ),
402 ];
403
404 for (input, description) in error_cases {
405 let result = parse(input, "test.lemma", &ResourceLimits::default());
406 assert!(
407 result.is_err(),
408 "Expected error for {} but got success",
409 description
410 );
411 }
412 }
413
414 #[test]
415 fn parse_duration_literals_in_rules() {
416 let test_cases = vec![
418 ("2 years", "years"),
419 ("6 months", "months"),
420 ("52 weeks", "weeks"),
421 ("365 days", "days"),
422 ("24 hours", "hours"),
423 ("60 minutes", "minutes"),
424 ("3600 seconds", "seconds"),
425 ("1000 milliseconds", "milliseconds"),
426 ("500000 microseconds", "microseconds"),
427 ("50 percent", "percent"),
428 ];
429
430 for (expr, description) in test_cases {
431 let input = format!("doc test\nrule test = {}", expr);
432 let result = parse(&input, "test.lemma", &ResourceLimits::default());
433 assert!(
434 result.is_ok(),
435 "Failed to parse literal {} ({}): {:?}",
436 expr,
437 description,
438 result.err()
439 );
440 }
441 }
442
443 #[test]
444 fn parse_comparisons_with_duration_unit_conversions() {
445 let test_cases = vec![
447 (
448 "(duration in hours) > 2",
449 "duration conversion in comparison with parens",
450 ),
451 (
452 "(meeting_time in minutes) >= 30",
453 "duration conversion with gte",
454 ),
455 (
456 "(project_length in days) < 100",
457 "duration conversion with lt",
458 ),
459 (
460 "(delay in seconds) == 60",
461 "duration conversion with equality",
462 ),
463 (
464 "(1 hours) > (30 minutes)",
465 "duration conversions on both sides",
466 ),
467 (
468 "duration in hours > 2",
469 "duration conversion without parens",
470 ),
471 (
472 "meeting_time in seconds > 3600",
473 "variable duration conversion in comparison",
474 ),
475 (
476 "project_length in days > deadline_days",
477 "two variables with duration conversion",
478 ),
479 (
480 "duration in hours >= 1 and duration in hours <= 8",
481 "multiple duration comparisons",
482 ),
483 ];
484
485 for (expr, description) in test_cases {
486 let input = format!("doc test\nrule test = {}", expr);
487 let result = parse(&input, "test.lemma", &ResourceLimits::default());
488 assert!(
489 result.is_ok(),
490 "Failed to parse {} ({}): {:?}",
491 expr,
492 description,
493 result.err()
494 );
495 }
496 }
497
498 #[test]
499 fn parse_error_includes_attribute_and_parse_error_doc_name() {
500 let result = parse(
501 r#"
502doc test
503fact name = "Unclosed string
504fact age = 25
505"#,
506 "test.lemma",
507 &ResourceLimits::default(),
508 );
509
510 match result {
511 Err(LemmaError::Parse(details)) => {
512 assert_eq!(details.source_location.attribute, "test.lemma");
513 assert_eq!(details.source_location.doc_name, "<parse-error>");
514 }
515 Err(e) => panic!("Expected Parse error, got: {e:?}"),
516 Ok(_) => panic!("Expected parse error for unclosed string"),
517 }
518 }
519
520 #[test]
521 fn parse_error_is_returned_for_garbage_input() {
522 let result = parse(
523 r#"
524doc test
525this is not valid lemma syntax @#$%
526"#,
527 "test.lemma",
528 &ResourceLimits::default(),
529 );
530
531 assert!(result.is_err(), "Should fail on malformed input");
532 match result {
533 Err(LemmaError::Parse { .. }) => {
534 }
536 Err(e) => panic!("Expected Parse error, got: {e:?}"),
537 Ok(_) => panic!("Expected parse error"),
538 }
539 }
540}