1use alloc::string::{String, ToString};
12use alloc::vec;
13use alloc::vec::Vec;
14
15use nom::{
16 IResult, Parser,
17 branch::alt,
18 bytes::complete::{tag, take_while},
19 character::complete::{char, line_ending, satisfy, space0, space1},
20 combinator::{eof, opt, recognize, value},
21 multi::many0,
22 sequence::{delimited, preceded, terminated},
23};
24
25use crate::ast::{Atom, Body, Conn, ListOp, Literal, Located, Program, Span, Statement};
26use crate::diag::{Diagnostic, Diagnostics};
27use crate::keywords::{is_reserved, is_top_level, keyword_in, kw};
28
29#[derive(Debug, Clone)]
37struct Problem<'a> {
38 input: Span<'a>,
39 message: String,
40}
41
42impl<'a> nom::error::ParseError<Span<'a>> for Problem<'a> {
43 fn from_error_kind(input: Span<'a>, _: nom::error::ErrorKind) -> Self {
44 Problem {
45 input,
46 message: String::from("unexpected token"),
47 }
48 }
49 fn append(_: Span<'a>, _: nom::error::ErrorKind, other: Self) -> Self {
50 other
51 }
52}
53
54type PResult<'a, T> = IResult<Span<'a>, T, Problem<'a>>;
56
57fn promote<'a, T>(r: PResult<'a, T>, at: Span<'a>, msg: &str) -> PResult<'a, T> {
60 match r {
61 Err(nom::Err::Error(_)) => Err(nom::Err::Failure(Problem {
62 input: at,
63 message: String::from(msg),
64 })),
65 other => other,
66 }
67}
68
69fn perr<'a, T>(input: Span<'a>) -> PResult<'a, T> {
71 Err(nom::Err::Error(Problem {
72 input,
73 message: String::from("unexpected token"),
74 }))
75}
76
77fn is_ident_char(c: char) -> bool {
83 c.is_alphanumeric() || c == '_'
84}
85
86fn raw_identifier<'a>(input: Span<'a>) -> PResult<'a, Span<'a>> {
90 recognize((satisfy(|c| c.is_alphabetic()), take_while(is_ident_char))).parse(input)
91}
92
93fn identifier<'a>(input: Span<'a>) -> PResult<'a, Located<'a, &'a str>> {
95 let start = input;
96 let (rest, sp) = raw_identifier(input)?;
97 if is_reserved(sp.fragment()) {
98 return perr(start);
99 }
100 Ok((
101 rest,
102 Located {
103 data: *sp.fragment(),
104 span: start,
105 },
106 ))
107}
108
109fn comment<'a>(input: Span<'a>) -> PResult<'a, Span<'a>> {
111 recognize((tag("//"), take_while(|c| c != '\n' && c != '\r'))).parse(input)
112}
113
114fn eol<'a>(input: Span<'a>) -> PResult<'a, ()> {
117 value((), (space0, opt(comment), alt((line_ending, eof)))).parse(input)
118}
119
120fn noise_line<'a>(input: Span<'a>) -> PResult<'a, ()> {
122 value((), (space0, opt(comment), line_ending)).parse(input)
123}
124
125fn skip_noise<'a>(input: Span<'a>) -> PResult<'a, ()> {
127 value((), many0(noise_line)).parse(input)
128}
129
130fn atom<'a>(input: Span<'a>) -> PResult<'a, Located<'a, Atom<'a>>> {
138 let start = input;
139 let (input, domain) = opt(terminated(identifier, char('.'))).parse(input)?;
140 let (input, subject) = identifier(input)?;
141 let (input, _) = space1(input)?;
142 let (input, predicate) = identifier(input)?;
143 let (input, object) = opt(preceded(space1, identifier)).parse(input)?;
144 Ok((
145 input,
146 Located {
147 data: Atom {
148 domain: domain.map(|d| d.data),
149 subject: subject.data,
150 predicate: predicate.data,
151 object: object.map(|o| o.data),
152 },
153 span: start,
154 },
155 ))
156}
157
158fn literal<'a>(input: Span<'a>) -> PResult<'a, Located<'a, Literal<'a>>> {
160 let start = input;
161 let (input, neg) = opt(terminated(tag(kw::NOT), space1)).parse(input)?;
162 let (input, a) = atom(input)?;
163 Ok((
164 input,
165 Located {
166 data: Literal {
167 negated: neg.is_some(),
168 atom: a.data,
169 },
170 span: start,
171 },
172 ))
173}
174
175fn atom_line<'a>(input: Span<'a>) -> PResult<'a, Located<'a, Atom<'a>>> {
177 let (input, _) = space0(input)?;
178 let (input, a) = atom(input)?;
179 let (input, _) = eol(input)?;
180 Ok((input, a))
181}
182
183fn list_op<'a>(input: Span<'a>) -> PResult<'a, ListOp> {
187 alt((
188 value(ListOp::Exclusive, tag(kw::EXCLUSIVE)),
189 value(ListOp::Forbids, tag(kw::FORBIDS)),
190 value(ListOp::OneOf, tag(kw::ONEOF)),
191 value(ListOp::AtLeast, tag(kw::ATLEAST)),
192 ))
193 .parse(input)
194}
195
196fn list_body<'a>(input: Span<'a>) -> PResult<'a, Body<'a>> {
205 let (input, _) = space0(input)?;
206 let (input, op) = list_op(input)?;
208 let (input, _) = promote(
210 eol(input),
211 input,
212 "expected a newline after the list operator",
213 )?;
214 let at = input;
215 let (input, first) = promote(
216 atom_line(input),
217 at,
218 "a list premise needs at least two atoms",
219 )?;
220 let at = input;
221 let (input, second) = promote(
222 atom_line(input),
223 at,
224 "a list premise needs at least two atoms",
225 )?;
226 let (input, rest) = many0(atom_line).parse(input)?;
227
228 let mut atoms = vec![first, second];
229 atoms.extend(rest);
230 Ok((input, Body::List { op, atoms }))
231}
232
233fn cont_line<'a>(input: Span<'a>) -> PResult<'a, (Conn, Located<'a, Literal<'a>>)> {
237 let (input, _) = space0(input)?;
238 let (input, conn) =
240 alt((value(Conn::And, tag(kw::AND)), value(Conn::Or, tag(kw::OR)))).parse(input)?;
241 let (input, _) = space1(input)?;
242 let at = input;
243 let (input, lit) = promote(
244 literal(input),
245 at,
246 "AND/OR expects a literal: [NOT] <Subject> <predicate> [<object>]",
247 )?;
248 let (input, _) = promote(
249 eol(input),
250 input,
251 "unexpected text after the AND/OR literal",
252 )?;
253 Ok((input, (conn, lit)))
254}
255
256fn group_conn<'a>(conts: &[(Conn, Located<'a, Literal<'a>>)]) -> Result<Conn, Span<'a>> {
259 let mut seen: Option<Conn> = None;
260 for (conn, lit) in conts {
261 match seen {
262 None => seen = Some(*conn),
263 Some(s) if s != *conn => return Err(lit.span),
264 _ => {}
265 }
266 }
267 Ok(seen.unwrap_or(Conn::And))
268}
269
270fn fail_at<'a, T>(at: Span<'a>, msg: &str) -> PResult<'a, T> {
272 Err(nom::Err::Failure(Problem {
273 input: at,
274 message: String::from(msg),
275 }))
276}
277
278fn impl_body<'a>(input: Span<'a>) -> PResult<'a, Body<'a>> {
285 let (input, _) = space0(input)?;
286 let (input, _) = (tag(kw::WHEN), space1).parse(input)?;
288 let at = input;
290 let (input, when) = promote(
291 literal(input),
292 at,
293 "WHEN expects a literal: [NOT] <Subject> <predicate> [<object>]",
294 )?;
295 let (input, _) = promote(eol(input), input, "unexpected text after the WHEN literal")?;
296 let (input, ante_rest) = many0(cont_line).parse(input)?;
297 let ante_conn = match group_conn(&ante_rest) {
298 Ok(c) => c,
299 Err(span) => {
300 return fail_at(
301 span,
302 "don't mix AND and OR in one WHEN group — split it into separate premises",
303 );
304 }
305 };
306
307 let (input, _) = space0(input)?;
308 let at = input;
309 let (input, _) = promote(
310 tag(kw::THEN).parse(input),
311 at,
312 "expected THEN to complete the WHEN ... THEN implication",
313 )?;
314 let at = input;
315 let (input, then) = promote(
316 preceded(space1, literal).parse(input),
317 at,
318 "THEN expects a literal: [NOT] <Subject> <predicate> [<object>]",
319 )?;
320 let (input, _) = promote(eol(input), input, "unexpected text after the THEN literal")?;
321 let (input, cons_rest) = many0(cont_line).parse(input)?;
322 let cons_conn = match group_conn(&cons_rest) {
323 Ok(c) => c,
324 Err(span) => {
325 return fail_at(
326 span,
327 "don't mix AND and OR in one THEN group — split it into separate premises",
328 );
329 }
330 };
331
332 let mut antecedent = vec![when];
333 antecedent.extend(ante_rest.into_iter().map(|(_, l)| l));
334 let mut consequent = vec![then];
335 consequent.extend(cons_rest.into_iter().map(|(_, l)| l));
336 Ok((
337 input,
338 Body::Impl {
339 antecedent,
340 ante_conn,
341 consequent,
342 cons_conn,
343 },
344 ))
345}
346
347fn stmt_import<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
352 let (input, _) = (tag(kw::IMPORT), space1).parse(input)?;
353 let start = input;
354 let (input, path) = promote(
355 delimited(char('"'), take_while(|c| c != '"' && c != '\n'), char('"')).parse(input),
356 start,
357 "IMPORT expects a quoted path, e.g. IMPORT \"physics.vrf\"",
358 )?;
359 let (input, alias) = opt(preceded((space1, tag(kw::AS), space1), identifier)).parse(input)?;
361 let (input, _) = promote(
362 eol(input),
363 input,
364 "unexpected text after the IMPORT path (did you mean AS <alias>?)",
365 )?;
366 Ok((
367 input,
368 Statement::Import {
369 path: Located {
370 data: *path.fragment(),
371 span: start,
372 },
373 alias,
374 },
375 ))
376}
377
378fn stmt_domain<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
380 let (input, _) = (tag(kw::DOMAIN), space1).parse(input)?;
381 let at = input;
382 let (input, name) = promote(
383 identifier(input),
384 at,
385 "DOMAIN expects a name (a lowercase identifier), e.g. DOMAIN physics",
386 )?;
387 let (input, _) = promote(eol(input), input, "unexpected text after the DOMAIN name")?;
388 Ok((input, Statement::Domain(name)))
389}
390
391fn stmt_fact<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
393 let (input, _) = (tag(kw::FACT), space1).parse(input)?;
394 let at = input;
395 let (input, a) = promote(
396 atom(input),
397 at,
398 "FACT expects an atom: <Subject> <predicate> [<object>]",
399 )?;
400 let (input, _) = promote(eol(input), input, "unexpected text after the FACT atom")?;
401 Ok((input, Statement::Fact(a)))
402}
403
404fn stmt_assume<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
407 let (input, _) = (tag(kw::ASSUME), space1).parse(input)?;
408 let at = input;
409 let (input, lit) = promote(
410 literal(input),
411 at,
412 "ASSUME expects an atom: [NOT] <Subject> <predicate> [<object>]",
413 )?;
414 let (input, _) = promote(eol(input), input, "unexpected text after the ASSUME atom")?;
415 Ok((input, Statement::Assume(lit)))
416}
417
418fn stmt_negation<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
421 let (input, _) = (tag(kw::NOT), space1).parse(input)?;
422 let at = input;
423 let (input, a) = promote(
424 atom(input),
425 at,
426 "NOT expects an atom: <Subject> <predicate> [<object>]",
427 )?;
428 let (input, _) = promote(eol(input), input, "unexpected text after the NOT atom")?;
429 Ok((input, Statement::Negation(a)))
430}
431
432fn stmt_check<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
434 let (input, _) = tag(kw::CHECK).parse(input)?;
435 let (input, subject) = opt(preceded(space1, identifier)).parse(input)?;
436 let (input, bidir) = opt(preceded(space1, tag(kw::BIDIRECTIONAL))).parse(input)?;
437 let (input, _) = eol(input)?;
438 Ok((
439 input,
440 Statement::Check {
441 subject,
442 bidirectional: bidir.is_some(),
443 },
444 ))
445}
446
447fn stmt_premise<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
449 let (input, _) = (tag(kw::PREMISE), space1).parse(input)?;
450 let at = input;
452 let (input, name) = promote(
453 identifier(input),
454 at,
455 "expected a premise name (a lowercase identifier)",
456 )?;
457 let (input, _) = space0(input)?;
458 let (input, _) = promote(
459 char(':').parse(input),
460 input,
461 "expected ':' after the premise name",
462 )?;
463 let (input, _) = promote(eol(input), input, "unexpected text after 'PREMISE <name>:'")?;
464 let at = input;
465 let (input, body) = promote(
466 alt((list_body, impl_body)).parse(input),
467 at,
468 "a premise body must be a list (EXCLUSIVE/FORBIDS/ONEOF/ATLEAST) or WHEN ... THEN",
469 )?;
470 Ok((input, Statement::Premise { name, body }))
471}
472
473fn stmt_rule<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
475 let (input, _) = (tag(kw::RULE), space1).parse(input)?;
476 let at = input;
477 let (input, name) = promote(
478 identifier(input),
479 at,
480 "expected a rule name (a lowercase identifier)",
481 )?;
482 let (input, _) = space0(input)?;
483 let (input, _) = promote(
484 char(':').parse(input),
485 input,
486 "expected ':' after the rule name",
487 )?;
488 let (input, _) = promote(eol(input), input, "unexpected text after 'RULE <name>:'")?;
489 let at = input;
490 let (input, body) = promote(impl_body(input), at, "a rule body must be WHEN ... THEN")?;
491 Ok((input, Statement::Rule { name, body }))
492}
493
494fn statement<'a>(input: Span<'a>) -> PResult<'a, Statement<'a>> {
498 let (input, _) = space0(input)?;
502 alt((
503 stmt_domain,
504 stmt_import,
505 stmt_fact,
506 stmt_assume,
507 stmt_premise,
508 stmt_rule,
509 stmt_check,
510 stmt_negation,
511 ))
512 .parse(input)
513}
514
515const NOT_A_STATEMENT: &str = "expected a statement — a line must start with DOMAIN, FACT, NOT, ASSUME, PREMISE, RULE, CHECK, or IMPORT";
519
520pub fn parse(src: &str) -> Result<Program<'_>, Diagnostics> {
528 let mut input = Span::new(src);
529 let mut statements = Vec::new();
530 let mut errors: Vec<Diagnostic> = Vec::new();
531
532 loop {
533 if let Ok((rest, _)) = skip_noise(input) {
534 input = rest;
535 }
536 if at_end(input.fragment()) {
539 break;
540 }
541 match statement(input) {
542 Ok((rest, stmt)) => {
543 statements.push(stmt);
544 input = rest;
545 }
546 Err(nom::Err::Failure(p)) => {
548 errors.push(make_diag(src, p.input, p.message, false));
549 input = resync(p.input);
550 }
551 Err(nom::Err::Error(p)) => {
553 errors.push(make_diag(src, p.input, String::from(NOT_A_STATEMENT), true));
554 input = resync(p.input);
555 }
556 Err(nom::Err::Incomplete(_)) => break,
558 }
559 }
560
561 if errors.is_empty() {
562 Ok(Program { statements })
563 } else {
564 Err(Diagnostics { file: None, errors })
565 }
566}
567
568fn at_end(frag: &str) -> bool {
572 let t = frag.trim_start();
573 t.is_empty() || (t.starts_with("//") && !t.contains('\n'))
574}
575
576fn make_diag(src: &str, at: Span<'_>, message: String, general: bool) -> Diagnostic {
580 let line = at.location_line() as usize;
581 let col = at.get_column();
582 let line_text = src.lines().nth(line.saturating_sub(1)).unwrap_or("");
583 let keyword = if general { None } else { keyword_in(&message) };
588 Diagnostic {
589 line,
590 col,
591 width: caret_width(line_text, col),
592 message,
593 keyword,
594 general,
595 line_text: line_text.to_string(),
596 }
597}
598
599fn caret_width(line_text: &str, col: usize) -> usize {
602 let start = col.saturating_sub(1);
603 let trimmed_len = line_text.trim_end().chars().count();
604 trimmed_len.saturating_sub(start).max(1)
605}
606
607fn resync(at: Span<'_>) -> Span<'_> {
611 let mut input = consume_line(at);
612 loop {
613 if input.fragment().is_empty() || starts_top_level(input) {
614 return input;
615 }
616 input = consume_line(input);
617 }
618}
619
620fn consume_line(input: Span<'_>) -> Span<'_> {
623 let parsed: PResult<'_, Span<'_>> =
624 recognize((take_while(|c| c != '\n' && c != '\r'), opt(line_ending))).parse(input);
625 parsed.map(|(rest, _)| rest).unwrap_or(input)
626}
627
628fn starts_top_level(input: Span<'_>) -> bool {
630 let after = match space0::<_, Problem<'_>>(input) {
631 Ok((rest, _)) => rest,
632 Err(_) => return false,
633 };
634 let word: String = after
635 .fragment()
636 .chars()
637 .take_while(|c| c.is_ascii_uppercase())
638 .collect();
639 is_top_level(&word)
640}