1use alloc::string::{String, ToString};
12use alloc::vec;
13use alloc::vec::Vec;
14
15use nom::{
16 IResult, Parser,
17 branch::alt,
18 bytes::complete::{tag, take_while},
19 character::complete::{char, line_ending, satisfy, space0, space1},
20 combinator::{eof, opt, recognize, value},
21 multi::many0,
22 sequence::{delimited, preceded, terminated},
23};
24
25use crate::ast::{
26 Atom, Body, CloseKind, Conn, ListOp, Literal, Located, Program, Quant, Span, Statement,
27};
28use crate::diag::{Diagnostic, Diagnostics};
29use crate::keywords::{is_reserved, is_top_level, keyword_in, kw};
30
31#[derive(Debug, Clone)]
39struct Problem<'a> {
40 input: Span<'a>,
41 message: String,
42}
43
44impl<'a> nom::error::ParseError<Span<'a>> for Problem<'a> {
45 fn from_error_kind(input: Span<'a>, _: nom::error::ErrorKind) -> Self {
46 Problem {
47 input,
48 message: String::from("unexpected token"),
49 }
50 }
51 fn append(_: Span<'a>, _: nom::error::ErrorKind, other: Self) -> Self {
52 other
53 }
54}
55
56type PResult<'a, T> = IResult<Span<'a>, T, Problem<'a>>;
58
59fn promote<'a, T>(r: PResult<'a, T>, at: Span<'a>, msg: &str) -> PResult<'a, T> {
62 match r {
63 Err(nom::Err::Error(_)) => Err(nom::Err::Failure(Problem {
64 input: at,
65 message: String::from(msg),
66 })),
67 other => other,
68 }
69}
70
71fn perr<'a, T>(input: Span<'a>) -> PResult<'a, T> {
73 Err(nom::Err::Error(Problem {
74 input,
75 message: String::from("unexpected token"),
76 }))
77}
78
79fn is_ident_char(c: char) -> bool {
85 c.is_alphanumeric() || c == '_'
86}
87
88fn raw_identifier<'a>(input: Span<'a>) -> PResult<'a, Span<'a>> {
92 recognize((satisfy(|c| c.is_alphabetic()), take_while(is_ident_char))).parse(input)
93}
94
95fn identifier<'a>(input: Span<'a>) -> PResult<'a, Located<'a, &'a str>> {
97 let start = input;
98 let (rest, sp) = raw_identifier(input)?;
99 if is_reserved(sp.fragment()) {
100 return perr(start);
101 }
102 Ok((
103 rest,
104 Located {
105 data: *sp.fragment(),
106 span: start,
107 },
108 ))
109}
110
111fn comment<'a>(input: Span<'a>) -> PResult<'a, Span<'a>> {
113 recognize((tag("//"), take_while(|c| c != '\n' && c != '\r'))).parse(input)
114}
115
116fn eol<'a>(input: Span<'a>) -> PResult<'a, ()> {
119 value((), (space0, opt(comment), alt((line_ending, eof)))).parse(input)
120}
121
122fn noise_line<'a>(input: Span<'a>) -> PResult<'a, ()> {
124 value((), (space0, opt(comment), line_ending)).parse(input)
125}
126
127fn skip_noise<'a>(input: Span<'a>) -> PResult<'a, ()> {
129 value((), many0(noise_line)).parse(input)
130}
131
132fn atom<'a>(input: Span<'a>) -> PResult<'a, Located<'a, Atom<'a>>> {
140 let start = input;
141 let (input, domain) = opt(terminated(identifier, char('.'))).parse(input)?;
142 let (input, subject) = identifier(input)?;
143 let (input, _) = space1(input)?;
144 let (input, predicate) = identifier(input)?;
145 let (input, object) = opt(preceded(space1, identifier)).parse(input)?;
146 Ok((
147 input,
148 Located {
149 data: Atom {
150 domain: domain.map(|d| d.data),
151 subject: subject.data,
152 predicate: predicate.data,
153 object: object.map(|o| o.data),
154 },
155 span: start,
156 },
157 ))
158}
159
160fn literal<'a>(input: Span<'a>) -> PResult<'a, Located<'a, Literal<'a>>> {
162 let start = input;
163 let (input, neg) = opt(terminated(tag(kw::NOT), space1)).parse(input)?;
164 let (input, a) = atom(input)?;
165 Ok((
166 input,
167 Located {
168 data: Literal {
169 negated: neg.is_some(),
170 atom: a.data,
171 },
172 span: start,
173 },
174 ))
175}
176
177fn atom_line<'a>(input: Span<'a>) -> PResult<'a, Located<'a, Atom<'a>>> {
179 let (input, _) = space0(input)?;
180 let (input, a) = atom(input)?;
181 let (input, _) = eol(input)?;
182 Ok((input, a))
183}
184
185fn element_line<'a>(input: Span<'a>) -> PResult<'a, Located<'a, &'a str>> {
189 let (input, _) = space0(input)?;
190 let (input, id) = identifier(input)?;
191 let (input, _) = eol(input)?;
192 Ok((input, id))
193}
194
195fn list_op<'a>(input: Span<'a>) -> PResult<'a, ListOp> {
199 alt((
200 value(ListOp::Exclusive, tag(kw::EXCLUSIVE)),
201 value(ListOp::Forbids, tag(kw::FORBIDS)),
202 value(ListOp::OneOf, tag(kw::ONEOF)),
203 value(ListOp::AtLeast, tag(kw::ATLEAST)),
204 ))
205 .parse(input)
206}
207
208const LIST_NEEDS_TWO_ATOMS: &str = "a list premise needs at least two atoms";
219
220fn list_body<'a>(input: Span<'a>) -> PResult<'a, Body<'a>> {
221 let (input, _) = space0(input)?;
222 let (input, op) = list_op(input)?;
224 let (input, _) = promote(
226 eol(input),
227 input,
228 "expected a newline after the list operator",
229 )?;
230 let at = input;
231 let (input, first) = promote(atom_line(input), at, LIST_NEEDS_TWO_ATOMS)?;
232 let at = input;
233 let (input, second) = promote(atom_line(input), at, LIST_NEEDS_TWO_ATOMS)?;
234 let (input, rest) = many0(atom_line).parse(input)?;
235
236 let mut atoms = vec![first, second];
237 atoms.extend(rest);
238 Ok((input, Body::List { op, atoms }))
239}
240
241fn cont_line<'a>(input: Span<'a>) -> PResult<'a, (Conn, Located<'a, Literal<'a>>)> {
245 let (input, _) = space0(input)?;
246 let (input, conn) =
248 alt((value(Conn::And, tag(kw::AND)), value(Conn::Or, tag(kw::OR)))).parse(input)?;
249 let (input, _) = space1(input)?;
250 let at = input;
251 let (input, lit) = promote(
252 literal(input),
253 at,
254 "AND/OR expects a literal: [NOT] <Subject> <predicate> [<object>]",
255 )?;
256 let (input, _) = promote(
257 eol(input),
258 input,
259 "unexpected text after the AND/OR literal",
260 )?;
261 Ok((input, (conn, lit)))
262}
263
264fn group_conn<'a>(conts: &[(Conn, Located<'a, Literal<'a>>)]) -> Result<Conn, Span<'a>> {
267 let mut seen: Option<Conn> = None;
268 for (conn, lit) in conts {
269 match seen {
270 None => seen = Some(*conn),
271 Some(s) if s != *conn => return Err(lit.span),
272 _ => {}
273 }
274 }
275 Ok(seen.unwrap_or(Conn::And))
276}
277
278fn fail_at<'a, T>(at: Span<'a>, msg: &str) -> PResult<'a, T> {
280 Err(nom::Err::Failure(Problem {
281 input: at,
282 message: String::from(msg),
283 }))
284}
285
286fn impl_body<'a>(input: Span<'a>) -> PResult<'a, Body<'a>> {
293 let (input, _) = space0(input)?;
294 let (input, _) = (tag(kw::WHEN), space1).parse(input)?;
296 let at = input;
298 let (input, when) = promote(
299 literal(input),
300 at,
301 "WHEN expects a literal: [NOT] <Subject> <predicate> [<object>]",
302 )?;
303 let (input, _) = promote(eol(input), input, "unexpected text after the WHEN literal")?;
304 let (input, ante_rest) = many0(cont_line).parse(input)?;
305 let ante_conn = match group_conn(&ante_rest) {
306 Ok(c) => c,
307 Err(span) => {
308 return fail_at(
309 span,
310 "don't mix AND and OR in one WHEN group — split it into separate premises",
311 );
312 }
313 };
314
315 let (input, _) = space0(input)?;
316 let at = input;
317 let (input, _) = promote(
318 tag(kw::THEN).parse(input),
319 at,
320 "expected THEN to complete the WHEN ... THEN implication",
321 )?;
322 let at = input;
323 let (input, then) = promote(
324 preceded(space1, literal).parse(input),
325 at,
326 "THEN expects a literal: [NOT] <Subject> <predicate> [<object>]",
327 )?;
328 let (input, _) = promote(eol(input), input, "unexpected text after the THEN literal")?;
329 let (input, cons_rest) = many0(cont_line).parse(input)?;
330 let cons_conn = match group_conn(&cons_rest) {
331 Ok(c) => c,
332 Err(span) => {
333 return fail_at(
334 span,
335 "don't mix AND and OR in one THEN group — split it into separate premises",
336 );
337 }
338 };
339
340 let mut antecedent = vec![when];
341 antecedent.extend(ante_rest.into_iter().map(|(_, l)| l));
342 let mut consequent = vec![then];
343 consequent.extend(cons_rest.into_iter().map(|(_, l)| l));
344 Ok((
345 input,
346 Body::Impl {
347 antecedent,
348 ante_conn,
349 consequent,
350 cons_conn,
351 },
352 ))
353}
354
355fn stmt_import<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
360 let (input, _) = (tag(kw::IMPORT), space1).parse(input)?;
361 let start = input;
362 let (input, path) = promote(
363 delimited(char('"'), take_while(|c| c != '"' && c != '\n'), char('"')).parse(input),
364 start,
365 "IMPORT expects a quoted path, e.g. IMPORT \"physics.vrf\"",
366 )?;
367 let (input, alias) = opt(preceded((space1, tag(kw::AS), space1), identifier)).parse(input)?;
369 let (input, _) = promote(
370 eol(input),
371 input,
372 "unexpected text after the IMPORT path (did you mean AS <alias>?)",
373 )?;
374 Ok((
375 input,
376 Statement::Import {
377 path: Located {
378 data: *path.fragment(),
379 span: start,
380 },
381 alias,
382 },
383 ))
384}
385
386fn stmt_domain<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
388 let (input, _) = (tag(kw::DOMAIN), space1).parse(input)?;
389 let at = input;
390 let (input, name) = promote(
391 identifier(input),
392 at,
393 "DOMAIN expects a name (a lowercase identifier), e.g. DOMAIN physics",
394 )?;
395 let (input, _) = promote(eol(input), input, "unexpected text after the DOMAIN name")?;
396 Ok((input, Statement::Domain(name)))
397}
398
399fn stmt_fact<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
401 let (input, _) = (tag(kw::FACT), space1).parse(input)?;
402 let at = input;
403 let (input, a) = promote(
404 atom(input),
405 at,
406 "FACT expects an atom: <Subject> <predicate> [<object>]",
407 )?;
408 let (input, _) = promote(eol(input), input, "unexpected text after the FACT atom")?;
409 Ok((input, Statement::Fact(a)))
410}
411
412fn stmt_assume<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
415 let (input, _) = (tag(kw::ASSUME), space1).parse(input)?;
416 let at = input;
417 let (input, lit) = promote(
418 literal(input),
419 at,
420 "ASSUME expects an atom: [NOT] <Subject> <predicate> [<object>]",
421 )?;
422 let (input, _) = promote(eol(input), input, "unexpected text after the ASSUME atom")?;
423 Ok((input, Statement::Assume(lit)))
424}
425
426fn stmt_negation<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
429 let (input, _) = (tag(kw::NOT), space1).parse(input)?;
430 let at = input;
431 let (input, a) = promote(
432 atom(input),
433 at,
434 "NOT expects an atom: <Subject> <predicate> [<object>]",
435 )?;
436 let (input, _) = promote(eol(input), input, "unexpected text after the NOT atom")?;
437 Ok((input, Statement::Negation(a)))
438}
439
440fn stmt_check<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
442 let (input, _) = tag(kw::CHECK).parse(input)?;
443 let (input, subject) = opt(preceded(space1, identifier)).parse(input)?;
444 let (input, bidir) = opt(preceded(space1, tag(kw::BIDIRECTIONAL))).parse(input)?;
445 let (input, _) = eol(input)?;
446 Ok((
447 input,
448 Statement::Check {
449 subject,
450 bidirectional: bidir.is_some(),
451 },
452 ))
453}
454
455fn stmt_set<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
458 let (input, _) = (tag(kw::SET), space1).parse(input)?;
459 let at = input;
460 let (input, name) = promote(
461 identifier(input),
462 at,
463 "SET expects a name (a lowercase identifier), e.g. SET tasks",
464 )?;
465 let (input, _) = promote(eol(input), input, "unexpected text after the SET name")?;
466 let at = input;
467 let (input, first) = promote(
468 element_line(input),
469 at,
470 "a SET needs at least one element — one identifier per line",
471 )?;
472 let (input, rest) = many0(element_line).parse(input)?;
473 let mut elements = vec![first];
474 elements.extend(rest);
475 Ok((input, Statement::Set { name, elements }))
476}
477
478fn stmt_close<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
480 let (input, _) = (tag(kw::CLOSE), space1).parse(input)?;
481 let at = input;
482 let (input, relation) = promote(
483 identifier(input),
484 at,
485 "CLOSE expects a relation name, e.g. CLOSE depends_on TRANSITIVE",
486 )?;
487 let (input, _) = promote(
488 (space1, tag(kw::TRANSITIVE)).parse(input),
489 input,
490 "CLOSE expects a closure kind: CLOSE <relation> TRANSITIVE",
491 )?;
492 let (input, _) = promote(
493 eol(input),
494 input,
495 "unexpected text after 'CLOSE … TRANSITIVE'",
496 )?;
497 Ok((
498 input,
499 Statement::Close {
500 relation,
501 kind: CloseKind::Transitive,
502 },
503 ))
504}
505
506fn for_each<'a>(input: Span<'a>) -> PResult<'a, Quant<'a>> {
513 let (input, _) = tag(kw::FOR).parse(input)?;
515 let at = input;
517 let (input, _) = promote(
518 (space1, tag(kw::EACH), space1).parse(input),
519 at,
520 "FOR must be followed by EACH: FOR EACH <binder> IN <set> (or FOR EACH <a> <relation> <b>)",
521 )?;
522 let at = input;
523 let (input, first) = promote(
524 identifier(input),
525 at,
526 "expected a binder name after FOR EACH",
527 )?;
528 let (input, _) = promote(
529 space1(input),
530 input,
531 "expected `IN <set>` or a relation `<rel> <binder>` after the FOR EACH binder",
532 )?;
533 let after_in: PResult<'a, (Span<'a>, Span<'a>)> = (tag(kw::IN), space1).parse(input);
535 if let Ok((rest, _)) = after_in {
536 let at = rest;
537 let (rest, set) = promote(identifier(rest), at, "expected a set name after IN")?;
538 return Ok((rest, Quant::InSet { binder: first, set }));
539 }
540 let at = input;
541 let (input, predicate) = promote(
542 identifier(input),
543 at,
544 "expected a relation name (FOR EACH <a> <relation> <b>)",
545 )?;
546 let (input, _) = promote(
547 space1(input),
548 input,
549 "expected the second binder after the relation (FOR EACH <a> <relation> <b>)",
550 )?;
551 let at = input;
552 let (input, right) = promote(
553 identifier(input),
554 at,
555 "expected the second binder (FOR EACH <a> <relation> <b>)",
556 )?;
557 Ok((
558 input,
559 Quant::Relation {
560 left: first,
561 predicate,
562 right,
563 },
564 ))
565}
566
567fn named_header<'a>(
573 input: Span<'a>,
574 name_msg: &'static str,
575 colon_msg: &'static str,
576 tail_msg: &'static str,
577) -> PResult<'a, (Located<'a, &'a str>, Option<Quant<'a>>)> {
578 let at = input;
579 let (input, name) = promote(identifier(input), at, name_msg)?;
580 let (input, quant) = opt(preceded(space1, for_each)).parse(input)?;
581 let (input, _) = space0(input)?;
582 let (input, _) = promote(char(':').parse(input), input, colon_msg)?;
583 let (input, _) = promote(eol(input), input, tail_msg)?;
584 Ok((input, (name, quant)))
585}
586
587fn stmt_premise<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
588 let (input, _) = (tag(kw::PREMISE), space1).parse(input)?;
589 let (input, (name, quant)) = named_header(
591 input,
592 "expected a premise name (a lowercase identifier)",
593 "expected ':' after the premise name",
594 "unexpected text after 'PREMISE <name>:'",
595 )?;
596 let at = input;
597 let (input, body) = promote(
598 alt((list_body, impl_body)).parse(input),
599 at,
600 "a premise body must be a list (EXCLUSIVE/FORBIDS/ONEOF/ATLEAST) or WHEN ... THEN",
601 )?;
602 Ok((input, Statement::Premise { name, quant, body }))
603}
604
605fn stmt_rule<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
608 let (input, _) = (tag(kw::RULE), space1).parse(input)?;
609 let (input, (name, quant)) = named_header(
610 input,
611 "expected a rule name (a lowercase identifier)",
612 "expected ':' after the rule name",
613 "unexpected text after 'RULE <name>:'",
614 )?;
615 let at = input;
616 let (input, body) = promote(impl_body(input), at, "a rule body must be WHEN ... THEN")?;
617 Ok((input, Statement::Rule { name, quant, body }))
618}
619
620fn statement<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
624 let (input, _) = space0(input)?;
628 alt((
629 stmt_domain,
630 stmt_import,
631 stmt_set,
632 stmt_close,
633 stmt_fact,
634 stmt_assume,
635 stmt_premise,
636 stmt_rule,
637 stmt_check,
638 stmt_negation,
639 ))
640 .parse(input)
641}
642
643fn not_a_statement() -> String {
649 alloc::format!(
650 "expected a statement — a line must start with {}",
651 crate::keywords::top_level_menu()
652 )
653}
654
655pub fn parse(src: &str) -> Result<Program<'_>, Diagnostics> {
663 let mut input = Span::new(src);
664 let mut statements = Vec::new();
665 let mut errors: Vec<Diagnostic> = Vec::new();
666
667 loop {
668 if let Ok((rest, _)) = skip_noise(input) {
669 input = rest;
670 }
671 if at_end(input.fragment()) {
674 break;
675 }
676 match statement(input) {
677 Ok((rest, stmt)) => {
678 statements.push(stmt);
679 input = rest;
680 }
681 Err(nom::Err::Failure(p)) => {
683 errors.push(make_diag(src, p.input, p.message, false));
684 input = resync(p.input);
685 }
686 Err(nom::Err::Error(p)) => {
688 errors.push(make_diag(src, p.input, not_a_statement(), true));
689 input = resync(p.input);
690 }
691 Err(nom::Err::Incomplete(_)) => break,
693 }
694 }
695
696 if errors.is_empty() {
697 Ok(Program { statements })
698 } else {
699 Err(Diagnostics { file: None, errors })
700 }
701}
702
703fn at_end(frag: &str) -> bool {
707 let t = frag.trim_start();
708 t.is_empty() || (t.starts_with("//") && !t.contains('\n'))
709}
710
711fn make_diag(src: &str, at: Span<'_>, message: String, general: bool) -> Diagnostic {
715 let line = at.location_line() as usize;
716 let col = at.get_column();
717 let line_text = src.lines().nth(line.saturating_sub(1)).unwrap_or("");
718 let keyword = if general { None } else { keyword_in(&message) };
723 Diagnostic {
724 line,
725 col,
726 width: caret_width(line_text, col),
727 message,
728 keyword,
729 general,
730 line_text: line_text.to_string(),
731 }
732}
733
734fn caret_width(line_text: &str, col: usize) -> usize {
737 let start = col.saturating_sub(1);
738 let trimmed_len = line_text.trim_end().chars().count();
739 trimmed_len.saturating_sub(start).max(1)
740}
741
742fn resync(at: Span<'_>) -> Span<'_> {
746 let mut input = consume_line(at);
747 loop {
748 if input.fragment().is_empty() || starts_top_level(input) {
749 return input;
750 }
751 input = consume_line(input);
752 }
753}
754
755fn consume_line(input: Span<'_>) -> Span<'_> {
758 let parsed: PResult<'_, Span<'_>> =
759 recognize((take_while(|c| c != '\n' && c != '\r'), opt(line_ending))).parse(input);
760 parsed.map(|(rest, _)| rest).unwrap_or(input)
761}
762
763fn starts_top_level(input: Span<'_>) -> bool {
765 let after = match space0::<_, Problem<'_>>(input) {
766 Ok((rest, _)) => rest,
767 Err(_) => return false,
768 };
769 let word: String = after
770 .fragment()
771 .chars()
772 .take_while(|c| c.is_ascii_uppercase())
773 .collect();
774 is_top_level(&word)
775}