1mod augmented;
2mod bnf;
3
4mod nom_xt;
5
6#[cfg(feature = "ABNF")]
7pub use augmented::ABNF;
8pub use bnf::BNF;
9
10use crate::expression::Expression;
11use crate::grammar::Grammar;
12use crate::production::Production;
13use crate::term::Term;
14
15use nom::{
16 IResult, Parser,
17 branch::alt,
18 bytes::complete::{tag, take_till, take_until},
19 character::complete::{self, satisfy},
20 combinator::{all_consuming, eof, not, opt, peek, recognize},
21 multi::many1,
22 sequence::{delimited, preceded, terminated},
23};
24use nom_xt::xt_list_with_separator;
25
26#[derive(Clone, Debug, PartialEq)]
31pub(crate) enum ParsedTerm {
32 Simple(Term),
34 Group(Vec<ParsedExpression>),
36 Optional(Vec<ParsedExpression>),
38}
39
40#[derive(Clone, Debug, PartialEq)]
42pub(crate) enum ParsedExpression {
43 Terms(Vec<ParsedTerm>),
45}
46
47#[derive(Clone, Debug, PartialEq)]
49pub(crate) enum ParsedProduction {
50 Complex {
52 lhs: String,
53 rhs: Vec<ParsedExpression>,
54 },
55}
56
57#[derive(Clone, Debug, Default, PartialEq)]
59pub(crate) struct ParsedGrammar {
60 pub(crate) productions: Vec<ParsedProduction>,
61}
62
63impl ParsedGrammar {
64 pub(crate) const fn new(productions: Vec<ParsedProduction>) -> Self {
65 Self { productions }
66 }
67}
68
69pub trait Format {
70 fn nonterminal_delimiter() -> Option<(char, char)>;
71 fn production_separator() -> &'static str;
72 fn alternative_separator() -> char;
73 #[must_use]
76 fn production_start_char() -> Option<char> {
77 None
78 }
79}
80
81fn nonterminal<F: Format>(input: &str) -> IResult<&str, Term> {
82 let _span = crate::tracing::span!(DEBUG, "nonterminal").entered();
83 let (input, nt) = match F::nonterminal_delimiter() {
84 Some((start, end)) => delimited(
85 complete::char(start),
86 take_till(|c: char| c == end),
87 complete::char(end),
88 )
89 .parse(input)?,
90 None => {
91 satisfy(|c: char| c.is_alphabetic()).parse(input)?;
92 take_till(|c: char| c.is_whitespace() || c == ')' || c == ']').parse(input)?
93 }
94 };
95
96 let (input, _) = whitespace_plus_comments(input).unwrap();
97
98 Ok((input, Term::Nonterminal(nt.to_string())))
99}
100
101fn prod_lhs<F: Format>(input: &str) -> IResult<&str, Term> {
102 let _span = crate::tracing::span!(DEBUG, "prod_lhs").entered();
103 let (input, nt) = nonterminal::<F>(input)?;
104
105 let (input, _) = tag(F::production_separator()).parse(input)?;
106 let (input, _) = opt(complete::char(F::alternative_separator())).parse(input)?;
108 let (input, _) = whitespace_plus_comments(input).unwrap();
109
110 Ok((input, nt))
111}
112
113fn prod_rhs<F: Format>(input: &str) -> IResult<&str, Vec<Expression>> {
114 let _span = crate::tracing::span!(DEBUG, "prod_rhs").entered();
115 xt_list_with_separator(expression::<F>, expression_next::<F>).parse(input)
116}
117
118pub fn terminal(input: &str) -> IResult<&str, Term> {
119 let _span = crate::tracing::span!(DEBUG, "terminal").entered();
120 let (input, t) = alt((
121 delimited(complete::char('"'), take_until("\""), complete::char('"')),
122 delimited(complete::char('\''), take_until("'"), complete::char('\'')),
123 ))
124 .parse(input)?;
125
126 let (input, _) = whitespace_plus_comments(input).unwrap();
127
128 Ok((input, Term::Terminal(t.to_string())))
129}
130
131#[cfg_attr(test, mutants::skip)]
133pub fn whitespace_plus_comments(mut input: &str) -> IResult<&str, char> {
134 let _span = crate::tracing::span!(DEBUG, "whitespace_plus_comments").entered();
135 loop {
136 let rest = input.trim_start_matches(|c: char| c.is_whitespace());
137 if rest.len() == input.len() {
138 if let Some(after_semicolon) = rest.strip_prefix(';') {
139 if let Some(pos) = after_semicolon.find(['\r', '\n']) {
140 input = &after_semicolon[pos..];
141 } else {
142 return Ok(("", '\0'));
143 }
144 } else {
145 return Ok((input, '\0'));
146 }
147 } else {
148 input = rest;
149 }
150 }
151}
152
153pub fn is_format_standard_bnf(input: &str) -> bool {
154 let (input, _) = whitespace_plus_comments(input).unwrap();
155 complete::char::<&str, nom::error::Error<&str>>('<')
156 .parse(input)
157 .is_ok()
158}
159
160pub fn term<F: Format>(input: &str) -> IResult<&str, Term> {
161 let _span = crate::tracing::span!(DEBUG, "term").entered();
162 alt((terminal, nonterminal::<F>)).parse(input)
163}
164
165pub fn expression_next<F: Format>(input: &str) -> IResult<&str, &str> {
166 let _span = crate::tracing::span!(DEBUG, "expression_next").entered();
167 let (input, _) = complete::char(F::alternative_separator()).parse(input)?;
168 let (input, _) = whitespace_plus_comments(input).unwrap();
169
170 Ok((input, ""))
171}
172
173pub fn expression<F: Format>(input: &str) -> IResult<&str, Expression> {
174 let _span = crate::tracing::span!(DEBUG, "expression").entered();
175 let (input, terms) =
176 many1(terminated(term::<F>, not(tag(F::production_separator())))).parse(input)?;
177
178 Ok((input, Expression::from_parts(terms)))
179}
180
181pub fn production<F: Format>(input: &str) -> IResult<&str, Production> {
182 let _span = crate::tracing::span!(DEBUG, "production").entered();
183 let (input, lhs) = prod_lhs::<F>(input)?;
184 let (input, rhs) = prod_rhs::<F>(input)?;
185 let (input, _) = match F::production_start_char() {
186 Some(start_char) => alt((
187 recognize(peek(eof)),
188 recognize(peek(preceded(
189 whitespace_plus_comments,
190 complete::char(start_char),
191 ))),
192 ))
193 .parse(input)?,
194 None => alt((recognize(peek(eof)), recognize(peek(prod_lhs::<F>)))).parse(input)?,
195 };
196
197 Ok((input, Production::from_parts(lhs, rhs)))
198}
199
200#[allow(dead_code)] pub fn grammar<F: Format>(input: &str) -> IResult<&str, Grammar> {
205 let (input, parsed) = parsed_grammar::<F>(input)?;
206 Ok((input, normalize_parsed_grammar(parsed)))
207}
208
209pub(crate) fn grammar_has_extended_syntax(input: &str) -> bool {
212 if !input.contains('(') && !input.contains('[') {
213 return false;
214 }
215 let mut in_double = false;
216 let mut in_single = false;
217 for c in input.chars() {
218 if in_double {
219 if c == '"' {
220 in_double = false;
221 }
222 continue;
223 }
224 if in_single {
225 if c == '\'' {
226 in_single = false;
227 }
228 continue;
229 }
230 match c {
231 '"' => in_double = true,
232 '\'' => in_single = true,
233 '(' | '[' => return true,
234 _ => {}
235 }
236 }
237 false
238}
239
240fn plain_grammar<F: Format>(input: &str) -> IResult<&str, Grammar> {
242 let _span = crate::tracing::span!(DEBUG, "plain_grammar").entered();
243 let (input, _) = whitespace_plus_comments(input)?;
244 let (input, first) = production::<F>(input)?;
245 let (input, rest) = many1(preceded(whitespace_plus_comments, production::<F>)).parse(input)?;
246 let mut prods = vec![first];
247 prods.extend(rest);
248 Ok((input, Grammar::from_parts(prods)))
249}
250
251#[allow(dead_code)] pub fn grammar_complete<F: Format>(input: &str) -> IResult<&str, Grammar> {
254 let _span = crate::tracing::span!(DEBUG, "grammar_complete").entered();
255 if !grammar_has_extended_syntax(input)
256 && let Ok((input, g)) = all_consuming(plain_grammar::<F>).parse(input)
257 {
258 return Ok((input, g));
259 }
260 let (input, parsed) = parsed_grammar_complete::<F>(input)?;
261 Ok((input, normalize_parsed_grammar(parsed)))
262}
263
264fn parsed_nonterminal<F: Format>(input: &str) -> IResult<&str, ParsedTerm> {
267 let (input, nt) = match F::nonterminal_delimiter() {
268 Some((start, end)) => delimited(
269 complete::char(start),
270 take_till(|c: char| c == end),
271 complete::char(end),
272 )
273 .parse(input)?,
274 None => {
275 satisfy(|c: char| c.is_alphabetic()).parse(input)?;
276 take_till(|c: char| c.is_whitespace() || c == ')' || c == ']').parse(input)?
277 }
278 };
279
280 let (input, _) = whitespace_plus_comments(input).unwrap();
281
282 Ok((input, ParsedTerm::Simple(Term::Nonterminal(nt.to_string()))))
283}
284
285fn parsed_terminal(input: &str) -> IResult<&str, ParsedTerm> {
286 let (input, t) = alt((
287 delimited(complete::char('"'), take_until("\""), complete::char('"')),
288 delimited(complete::char('\''), take_until("'"), complete::char('\'')),
289 ))
290 .parse(input)?;
291
292 let (input, _) = whitespace_plus_comments(input).unwrap();
293
294 Ok((input, ParsedTerm::Simple(Term::Terminal(t.to_string()))))
295}
296
297fn parsed_expression_next<F: Format>(input: &str) -> IResult<&str, &str> {
298 let (input, _) = complete::char(F::alternative_separator()).parse(input)?;
299 let (input, _) = whitespace_plus_comments(input).unwrap();
300
301 Ok((input, ""))
302}
303
304fn parsed_anonymous_group<F: Format>(input: &str) -> IResult<&str, ParsedTerm> {
305 let (input, rhs) = delimited(
306 complete::char('('),
307 xt_list_with_separator(parsed_expression::<F>, parsed_expression_next::<F>),
308 complete::char(')'),
309 )
310 .parse(input)?;
311
312 let (input, _) = whitespace_plus_comments(input).unwrap();
313
314 Ok((input, ParsedTerm::Group(rhs)))
315}
316
317fn parsed_optional_group<F: Format>(input: &str) -> IResult<&str, ParsedTerm> {
318 let (input, rhs) = delimited(
319 complete::char('['),
320 xt_list_with_separator(parsed_expression::<F>, parsed_expression_next::<F>),
321 complete::char(']'),
322 )
323 .parse(input)?;
324
325 let (input, _) = whitespace_plus_comments(input).unwrap();
326
327 Ok((input, ParsedTerm::Optional(rhs)))
328}
329
330fn parsed_term<F: Format>(input: &str) -> IResult<&str, ParsedTerm> {
331 alt((
332 parsed_terminal,
333 parsed_nonterminal::<F>,
334 parsed_anonymous_group::<F>,
335 parsed_optional_group::<F>,
336 ))
337 .parse(input)
338}
339
340fn parsed_expression<F: Format>(input: &str) -> IResult<&str, ParsedExpression> {
341 let (input, terms) = many1(terminated(
342 parsed_term::<F>,
343 not(tag(F::production_separator())),
344 ))
345 .parse(input)?;
346
347 Ok((input, ParsedExpression::Terms(terms)))
348}
349
350fn parsed_prod_lhs<F: Format>(input: &str) -> IResult<&str, String> {
351 let (input, nt) = match F::nonterminal_delimiter() {
352 Some((start, end)) => delimited(
353 complete::char(start),
354 take_till(|c: char| c == end),
355 complete::char(end),
356 )
357 .parse(input)?,
358 None => {
359 satisfy(|c: char| c.is_alphabetic()).parse(input)?;
360 take_till(|c: char| c.is_whitespace() || c == ')' || c == ']').parse(input)?
361 }
362 };
363
364 let (input, _) = whitespace_plus_comments(input).unwrap();
365 let (input, _) = tag(F::production_separator()).parse(input)?;
366 let (input, _) = opt(complete::char(F::alternative_separator())).parse(input)?;
368 let (input, _) = whitespace_plus_comments(input).unwrap();
369
370 Ok((input, nt.to_string()))
371}
372
373fn parsed_prod_rhs<F: Format>(input: &str) -> IResult<&str, Vec<ParsedExpression>> {
374 xt_list_with_separator(parsed_expression::<F>, parsed_expression_next::<F>).parse(input)
375}
376
377fn parsed_production<F: Format>(input: &str) -> IResult<&str, ParsedProduction> {
378 let (input, lhs) = parsed_prod_lhs::<F>(input)?;
379 let (input, rhs) = parsed_prod_rhs::<F>(input)?;
380 let (input, _) =
381 alt((recognize(peek(eof)), recognize(peek(parsed_prod_lhs::<F>)))).parse(input)?;
382
383 Ok((input, ParsedProduction::Complex { lhs, rhs }))
384}
385
386fn parsed_grammar<F: Format>(input: &str) -> IResult<&str, ParsedGrammar> {
387 let (input, _) = whitespace_plus_comments(input)?;
388 let (input, prods) =
389 many1(preceded(whitespace_plus_comments, parsed_production::<F>)).parse(input)?;
390 Ok((input, ParsedGrammar::new(prods)))
391}
392
393fn parsed_grammar_complete<F: Format>(input: &str) -> IResult<&str, ParsedGrammar> {
394 all_consuming(parsed_grammar::<F>).parse(input)
395}
396
397fn normalize_parsed_grammar(parsed: ParsedGrammar) -> Grammar {
402 let mut used_names = crate::HashSet::new();
403 for prod in &parsed.productions {
404 let ParsedProduction::Complex { lhs, .. } = prod;
405 used_names.insert(lhs.clone());
406 }
407
408 let mut out_prods = Vec::new();
409 let mut anon_prods = Vec::new();
410
411 fn fresh_anon_name(used: &mut crate::HashSet<String>, counter: &mut usize) -> String {
413 loop {
414 let candidate = format!("__anon{}", counter);
415 *counter += 1;
416 if !used.contains(&candidate) {
417 used.insert(candidate.clone());
418 return candidate;
419 }
420 }
421 }
422
423 fn lower_expression(
424 expr: ParsedExpression,
425 used: &mut crate::HashSet<String>,
426 counter: &mut usize,
427 anon_prods: &mut Vec<Production>,
428 ) -> Expression {
429 let ParsedExpression::Terms(terms) = expr;
430 let terms: Vec<Term> = terms
431 .into_iter()
432 .map(|t| lower_term(t, used, counter, anon_prods))
433 .collect();
434 Expression::from_parts(terms)
435 }
436
437 fn lower_term(
438 term: ParsedTerm,
439 used: &mut crate::HashSet<String>,
440 counter: &mut usize,
441 anon_prods: &mut Vec<Production>,
442 ) -> Term {
443 match term {
444 ParsedTerm::Simple(t) => t,
445 ParsedTerm::Group(exprs) => {
446 let name = fresh_anon_name(used, counter);
447 let mut lowered_rhs: Vec<Expression> = exprs
448 .into_iter()
449 .map(|e| lower_expression(e, used, counter, anon_prods))
450 .collect();
451 if lowered_rhs.is_empty() {
453 lowered_rhs.push(Expression::from_parts(vec![Term::Terminal(String::new())]));
454 }
455 anon_prods.push(Production::from_parts(
456 Term::Nonterminal(name.clone()),
457 lowered_rhs,
458 ));
459 Term::Nonterminal(name)
460 }
461 ParsedTerm::Optional(exprs) => {
462 let name = fresh_anon_name(used, counter);
463 let mut lowered_rhs: Vec<Expression> = exprs
464 .into_iter()
465 .map(|e| lower_expression(e, used, counter, anon_prods))
466 .collect();
467 lowered_rhs.push(Expression::from_parts(vec![Term::Terminal(String::new())]));
469 anon_prods.push(Production::from_parts(
470 Term::Nonterminal(name.clone()),
471 lowered_rhs,
472 ));
473 Term::Nonterminal(name)
474 }
475 }
476 }
477
478 let mut counter = 0usize;
479
480 for parsed_prod in parsed.productions {
481 let ParsedProduction::Complex { lhs, rhs } = parsed_prod;
482 let lhs_term = Term::Nonterminal(lhs);
483 let rhs_exprs: Vec<Expression> = rhs
484 .into_iter()
485 .map(|e| lower_expression(e, &mut used_names, &mut counter, &mut anon_prods))
486 .collect();
487 out_prods.push(Production::from_parts(lhs_term, rhs_exprs));
488 }
489
490 out_prods.extend(anon_prods);
491 Grammar::from_parts(out_prods)
492}
493
494#[cfg(test)]
495#[allow(deprecated)] pub mod tests {
497 use super::*;
498
499 #[test]
500 fn whitespace_plus_comments_skips_comment_then_rest() {
501 let input = " ; comment\n rest";
502 let (remaining, _) = whitespace_plus_comments(input).unwrap();
503 assert_eq!(remaining, "rest");
504 }
505
506 #[test]
507 fn whitespace_plus_comments_comment_to_eof() {
508 let input = " ; comment";
509 let (remaining, _) = whitespace_plus_comments(input).unwrap();
510 assert_eq!(remaining, "");
511 }
512
513 #[test]
514 fn whitespace_plus_comments_skips_only_whitespace_without_semicolon() {
515 let input = " x";
516 let (remaining, _) = whitespace_plus_comments(input).unwrap();
517 assert!(remaining.starts_with('x'));
518 }
519
520 #[test]
521 fn terminal_match() {
522 let input = "\"hello world\"";
523 let expected = Term::Terminal("hello world".to_string());
524
525 let (_, actual) = terminal(input).unwrap();
526 assert_eq!(expected, actual);
527 }
528
529 #[test]
530 fn use_anon_nonterminal() {
531 let grammar = "s = ('a' / 'b') 'c'";
532 let grammar = grammar.parse::<Grammar>().unwrap();
533 let inputs = vec!["ac", "bc"];
534 for input in inputs {
535 assert!(grammar.parse_input(input).next().is_some());
536 }
537 }
538
539 #[test]
540 fn parse_optional_anon_nonterminal() {
541 let input = "s = 'c' ['a' / 'b']";
542 let expected = "s = 'c' ('a' / 'b' / '')";
543 let input = input.parse::<Grammar>().unwrap();
544 let twin = expected.parse::<Grammar>().unwrap();
545 assert_eq!(input, twin)
546 }
547 #[test]
548 fn parse_incremental_alternatives() {
550 let grammar = "s = a / a s
551 a = 'b'
552 a =/ 'c'";
553 assert!(grammar.parse::<Grammar>().is_ok());
554 }
555 #[test]
556 fn use_incremental_alternatives() {
557 let input = "s = a / (a s)
558 a = 'b'
559 a =/ 'c'";
560 let grammar = input.parse::<Grammar>().unwrap();
561 grammar
562 .parse_input("bcbccbbcbcbcbbbbbbbbbbbbccc")
563 .next()
564 .unwrap();
565 }
566
567 #[test]
568 fn empty_group_and_empty_optional_fail_to_parse() {
569 let empty_group: Result<Grammar, _> = "<s> ::= ()".parse();
571 assert!(empty_group.is_err(), "empty group () should fail to parse");
572 let empty_optional: Result<Grammar, _> = "<s> ::= []".parse();
573 assert!(
574 empty_optional.is_err(),
575 "empty optional [] should fail to parse"
576 );
577 }
578
579 #[test]
580 fn nested_groups_parse_and_parse_input() {
581 let grammar: Grammar = "
582 <s> ::= ((<a> | <b>) | <c>)
583 <a> ::= 'a'
584 <b> ::= 'b'
585 <c> ::= 'c'"
586 .parse()
587 .unwrap();
588 let parser = grammar.build_parser().unwrap();
589 for input in ["a", "b", "c"] {
590 assert!(
591 parser.parse_input(input).next().is_some(),
592 "nested group should parse '{input}'"
593 );
594 }
595 }
596
597 #[test]
598 fn round_trip_formatting_uses_anon() {
599 let grammar: Grammar = "
600 <s> ::= (<a> | <b>) [<c>]
601 <a> ::= 'a'
602 <b> ::= 'b'
603 <c> ::= 'c'"
604 .parse()
605 .unwrap();
606 let formatted = format!("{}", grammar);
607 assert!(
608 formatted.contains("__anon"),
609 "formatted grammar should use __anon* nonterminals, got: {formatted}"
610 );
611 let reparsed: Grammar = formatted.parse().unwrap();
612 assert_eq!(
613 grammar, reparsed,
614 "re-parsing formatted grammar should yield equal grammar"
615 );
616 }
617
618 #[test]
619 fn abnf_groups_and_optionals_parse_and_parse_input() {
620 let grammar: Grammar = "
621 s = ('a' / 'b') ['c']
622 a = 'a'
623 b = 'b'
624 c = 'c'"
625 .parse()
626 .unwrap();
627 let parser = grammar.build_parser().unwrap();
628 assert!(parser.parse_input("a").next().is_some());
629 assert!(parser.parse_input("ac").next().is_some());
630 assert!(parser.parse_input("b").next().is_some());
631 assert!(parser.parse_input("bc").next().is_some());
632 assert!(parser.parse_input("").next().is_none());
633 }
634
635 #[test]
636 fn single_element_group_and_optional() {
637 let grammar: Grammar = "
638 <s> ::= (<a>) [<b>]
639 <a> ::= 'a'
640 <b> ::= 'b'"
641 .parse()
642 .unwrap();
643 let parser = grammar.build_parser().unwrap();
644 assert!(
645 parser.parse_input("a").next().is_some(),
646 "single-element group (<a>) and optional omitted"
647 );
648 assert!(
649 parser.parse_input("ab").next().is_some(),
650 "single-element optional [<b>] present"
651 );
652 assert!(
653 parser.parse_input("").next().is_none(),
654 "start requires at least <a>"
655 );
656 }
657}