1use std::collections::HashMap;
17
18use super::error::{GrammarError, GrammarResult};
19
20#[derive(Debug, Clone, PartialEq, Eq)]
22pub struct CharRange {
23 pub lo: u8,
25 pub hi: u8,
27}
28
29impl CharRange {
30 pub fn single(b: u8) -> Self {
32 Self { lo: b, hi: b }
33 }
34
35 pub fn range(lo: u8, hi: u8) -> Self {
37 Self { lo, hi }
38 }
39
40 pub fn contains(&self, b: u8) -> bool {
42 b >= self.lo && b <= self.hi
43 }
44}
45
46#[derive(Debug, Clone)]
48pub enum GrammarNode {
49 Literal(Vec<u8>),
51 CharClass {
54 ranges: Vec<CharRange>,
55 negated: bool,
56 },
57 RuleRef(String),
59 Sequence(Vec<GrammarNode>),
61 Alternation(Vec<GrammarNode>),
63 Repeat {
65 node: Box<GrammarNode>,
66 min: usize,
67 max: Option<usize>,
68 },
69}
70
71#[derive(Debug, Clone)]
73pub struct Grammar {
74 pub rules: HashMap<String, GrammarNode>,
76 pub root: String,
78 pub source: String,
80}
81
82impl Grammar {
83 pub fn parse(input: &str) -> GrammarResult<Self> {
85 let mut parser = GbnfParser::new(input);
86 let mut grammar = parser.parse_grammar()?;
87 grammar.source = input.to_string();
88 Ok(grammar)
89 }
90
91 pub fn root_node(&self) -> GrammarResult<&GrammarNode> {
93 self.rules
94 .get(&self.root)
95 .ok_or_else(|| GrammarError::UnknownRule {
96 rule: self.root.clone(),
97 })
98 }
99}
100
101struct GbnfParser<'a> {
105 input: &'a [u8],
106 pos: usize,
107}
108
109impl<'a> GbnfParser<'a> {
110 fn new(input: &'a str) -> Self {
111 Self {
112 input: input.as_bytes(),
113 pos: 0,
114 }
115 }
116
117 fn parse_error(&self, msg: impl Into<String>) -> GrammarError {
120 GrammarError::ParseError {
121 pos: self.pos,
122 msg: msg.into(),
123 }
124 }
125
126 fn peek(&self) -> Option<u8> {
129 self.input.get(self.pos).copied()
130 }
131
132 fn peek2(&self) -> Option<u8> {
133 self.input.get(self.pos + 1).copied()
134 }
135
136 fn advance(&mut self) {
137 self.pos += 1;
138 }
139
140 fn consume(&mut self) -> Option<u8> {
141 let b = self.peek()?;
142 self.advance();
143 Some(b)
144 }
145
146 fn expect(&mut self, expected: u8) -> GrammarResult<()> {
147 match self.consume() {
148 Some(b) if b == expected => Ok(()),
149 Some(b) => Err(self.parse_error(format!(
150 "expected '{}' but got '{}'",
151 expected as char, b as char
152 ))),
153 None => Err(self.parse_error(format!(
154 "expected '{}' but reached end of input",
155 expected as char
156 ))),
157 }
158 }
159
160 fn is_ident_start(b: u8) -> bool {
161 b.is_ascii_alphabetic() || b == b'_'
162 }
163
164 fn is_ident_cont(b: u8) -> bool {
165 b.is_ascii_alphanumeric() || b == b'_' || b == b'-'
166 }
167
168 fn skip_inline_ws(&mut self) {
172 while matches!(self.peek(), Some(b' ') | Some(b'\t')) {
173 self.advance();
174 }
175 }
176
177 fn skip_ws_and_comments(&mut self) {
179 loop {
180 match self.peek() {
181 Some(b' ') | Some(b'\t') | Some(b'\r') | Some(b'\n') => {
182 self.advance();
183 }
184 Some(b'#') => {
185 while !matches!(self.peek(), None | Some(b'\n')) {
186 self.advance();
187 }
188 }
189 _ => break,
190 }
191 }
192 }
193
194 fn parse_grammar(&mut self) -> GrammarResult<Grammar> {
197 let mut rules: HashMap<String, GrammarNode> = HashMap::new();
198 let mut first_rule: Option<String> = None;
199
200 self.skip_ws_and_comments();
201
202 while self.pos < self.input.len() {
203 let name = self.parse_ident()?;
204 self.skip_inline_ws();
205
206 if self.pos + 3 > self.input.len() || &self.input[self.pos..self.pos + 3] != b"::=" {
208 return Err(self.parse_error("expected '::=' after rule name"));
209 }
210 self.pos += 3;
211 self.skip_inline_ws();
212
213 let node = self.parse_alternation()?;
214
215 if first_rule.is_none() {
216 first_rule = Some(name.clone());
217 }
218 rules.insert(name, node);
219
220 self.skip_ws_and_comments();
221 }
222
223 if rules.is_empty() {
224 return Err(GrammarError::ParseError {
225 pos: 0,
226 msg: "grammar has no rules".to_string(),
227 });
228 }
229
230 let root = if rules.contains_key("root") {
232 "root".to_string()
233 } else {
234 first_rule.unwrap_or_default()
235 };
236
237 Ok(Grammar {
238 rules,
239 root,
240 source: String::new(), })
242 }
243
244 fn parse_ident(&mut self) -> GrammarResult<String> {
247 let start = self.pos;
248 match self.peek() {
249 Some(b) if Self::is_ident_start(b) => {
250 self.advance();
251 }
252 _ => return Err(self.parse_error("expected identifier")),
253 }
254 while matches!(self.peek(), Some(b) if Self::is_ident_cont(b)) {
255 self.advance();
256 }
257 let slice = &self.input[start..self.pos];
258 Ok(String::from_utf8_lossy(slice).into_owned())
259 }
260
261 fn parse_alternation(&mut self) -> GrammarResult<GrammarNode> {
264 let first = self.parse_sequence()?;
265 self.skip_inline_ws();
266
267 if !matches!(self.peek(), Some(b'|')) {
268 return Ok(first);
269 }
270
271 let mut alternatives = vec![first];
272 while matches!(self.peek(), Some(b'|')) {
273 self.advance(); self.skip_inline_ws();
275 alternatives.push(self.parse_sequence()?);
276 self.skip_inline_ws();
277 }
278
279 Ok(GrammarNode::Alternation(alternatives))
280 }
281
282 fn parse_sequence(&mut self) -> GrammarResult<GrammarNode> {
285 let mut items: Vec<GrammarNode> = Vec::new();
286
287 loop {
288 self.skip_inline_ws();
289 match self.peek() {
291 None | Some(b'\n') | Some(b'\r') | Some(b'|') | Some(b')') => break,
292 Some(b'#') => break,
294 _ => {}
295 }
296 let item = self.parse_item()?;
297 items.push(item);
298 }
299
300 match items.len() {
301 0 => Ok(GrammarNode::Literal(vec![])),
302 1 => Ok(items.remove(0)),
303 _ => Ok(GrammarNode::Sequence(items)),
304 }
305 }
306
307 fn parse_item(&mut self) -> GrammarResult<GrammarNode> {
310 let base = self.parse_atom()?;
311 self.parse_quantifier(base)
312 }
313
314 fn parse_quantifier(&mut self, base: GrammarNode) -> GrammarResult<GrammarNode> {
315 match self.peek() {
316 Some(b'*') => {
317 self.advance();
318 Ok(GrammarNode::Repeat {
319 node: Box::new(base),
320 min: 0,
321 max: None,
322 })
323 }
324 Some(b'+') => {
325 self.advance();
326 Ok(GrammarNode::Repeat {
327 node: Box::new(base),
328 min: 1,
329 max: None,
330 })
331 }
332 Some(b'?') => {
333 self.advance();
334 Ok(GrammarNode::Repeat {
335 node: Box::new(base),
336 min: 0,
337 max: Some(1),
338 })
339 }
340 _ => Ok(base),
341 }
342 }
343
344 fn parse_atom(&mut self) -> GrammarResult<GrammarNode> {
347 match self.peek() {
348 Some(b'"') => self.parse_string_literal(),
349 Some(b'[') => self.parse_char_class(),
350 Some(b'(') => {
351 self.advance(); self.skip_inline_ws();
353 let inner = self.parse_alternation()?;
354 self.skip_inline_ws();
355 self.expect(b')')?;
356 Ok(inner)
357 }
358 Some(b) if Self::is_ident_start(b) => {
359 let name = self.parse_ident()?;
360 Ok(GrammarNode::RuleRef(name))
361 }
362 Some(b) => {
363 Err(self.parse_error(format!("unexpected character '{}' in grammar", b as char)))
364 }
365 None => Err(self.parse_error("unexpected end of input")),
366 }
367 }
368
369 fn parse_string_literal(&mut self) -> GrammarResult<GrammarNode> {
372 self.expect(b'"')?;
373 let mut bytes: Vec<u8> = Vec::new();
374
375 loop {
376 match self.consume() {
377 None => return Err(self.parse_error("unterminated string literal")),
378 Some(b'"') => break,
379 Some(b'\\') => {
380 let escaped = self.parse_escape_sequence()?;
381 bytes.extend_from_slice(&escaped);
382 }
383 Some(b) => bytes.push(b),
384 }
385 }
386
387 Ok(GrammarNode::Literal(bytes))
388 }
389
390 fn parse_escape_sequence(&mut self) -> GrammarResult<Vec<u8>> {
392 match self.consume() {
393 Some(b'n') => Ok(vec![b'\n']),
394 Some(b'r') => Ok(vec![b'\r']),
395 Some(b't') => Ok(vec![b'\t']),
396 Some(b'\\') => Ok(vec![b'\\']),
397 Some(b'"') => Ok(vec![b'"']),
398 Some(b'\'') => Ok(vec![b'\'']),
399 Some(b'0') => Ok(vec![0u8]),
400 Some(b'x') => {
401 let hi = self.consume_hex_digit()?;
403 let lo = self.consume_hex_digit()?;
404 Ok(vec![(hi << 4) | lo])
405 }
406 Some(b'u') => {
407 self.expect(b'{')?;
409 let mut codepoint: u32 = 0;
410 let mut digits = 0usize;
411 while !matches!(self.peek(), Some(b'}')) {
412 let d = self.consume_hex_digit()?;
413 codepoint = (codepoint << 4) | (d as u32);
414 digits += 1;
415 if digits > 6 {
416 return Err(self.parse_error("\\u{...} codepoint too large"));
417 }
418 }
419 self.expect(b'}')?;
420 let ch = char::from_u32(codepoint)
421 .ok_or_else(|| self.parse_error("invalid Unicode codepoint"))?;
422 let mut buf = [0u8; 4];
423 Ok(ch.encode_utf8(&mut buf).as_bytes().to_vec())
424 }
425 Some(b) => Err(self.parse_error(format!("unknown escape sequence '\\{}'", b as char))),
426 None => Err(self.parse_error("truncated escape sequence")),
427 }
428 }
429
430 fn consume_hex_digit(&mut self) -> GrammarResult<u8> {
431 match self.consume() {
432 Some(b @ b'0'..=b'9') => Ok(b - b'0'),
433 Some(b @ b'a'..=b'f') => Ok(b - b'a' + 10),
434 Some(b @ b'A'..=b'F') => Ok(b - b'A' + 10),
435 Some(b) => Err(self.parse_error(format!("expected hex digit, got '{}'", b as char))),
436 None => Err(self.parse_error("expected hex digit, reached end of input")),
437 }
438 }
439
440 fn parse_char_class(&mut self) -> GrammarResult<GrammarNode> {
443 self.expect(b'[')?;
444 let negated = if matches!(self.peek(), Some(b'^')) {
445 self.advance();
446 true
447 } else {
448 false
449 };
450
451 let mut ranges: Vec<CharRange> = Vec::new();
452
453 loop {
454 match self.peek() {
455 None => return Err(self.parse_error("unterminated character class")),
456 Some(b']') => {
457 self.advance();
458 break;
459 }
460 Some(b'\\') => {
461 self.advance(); let b = self.parse_class_escape()?;
463 if matches!(self.peek(), Some(b'-')) && !matches!(self.peek2(), Some(b']')) {
465 self.advance(); let hi = self.parse_class_byte()?;
467 ranges.push(CharRange::range(b, hi));
468 } else {
469 ranges.push(CharRange::single(b));
470 }
471 }
472 Some(_) => {
473 let lo = self.parse_class_byte()?;
474 if matches!(self.peek(), Some(b'-')) && !matches!(self.peek2(), Some(b']')) {
475 self.advance(); let hi = self.parse_class_byte()?;
477 ranges.push(CharRange::range(lo, hi));
478 } else {
479 ranges.push(CharRange::single(lo));
480 }
481 }
482 }
483 }
484
485 Ok(GrammarNode::CharClass { ranges, negated })
486 }
487
488 fn parse_class_byte(&mut self) -> GrammarResult<u8> {
490 match self.peek() {
491 Some(b'\\') => {
492 self.advance();
493 self.parse_class_escape()
494 }
495 Some(b) => {
496 self.advance();
497 Ok(b)
498 }
499 None => Err(self.parse_error("unexpected end inside character class")),
500 }
501 }
502
503 fn parse_class_escape(&mut self) -> GrammarResult<u8> {
505 match self.consume() {
506 Some(b'n') => Ok(b'\n'),
507 Some(b'r') => Ok(b'\r'),
508 Some(b't') => Ok(b'\t'),
509 Some(b'\\') => Ok(b'\\'),
510 Some(b']') => Ok(b']'),
511 Some(b'-') => Ok(b'-'),
512 Some(b'^') => Ok(b'^'),
513 Some(b'x') => {
514 let hi = self.consume_hex_digit()?;
515 let lo = self.consume_hex_digit()?;
516 Ok((hi << 4) | lo)
517 }
518 Some(b) => Err(self.parse_error(format!("unknown class escape '\\{}'", b as char))),
519 None => Err(self.parse_error("truncated class escape")),
520 }
521 }
522}
523
524#[cfg(test)]
527mod tests {
528 use super::*;
529
530 #[test]
531 fn test_parse_literal_alternation() {
532 let g = Grammar::parse(r#"root ::= "yes" | "no""#).unwrap();
533 assert_eq!(g.root, "root");
534 assert!(g.rules.contains_key("root"));
535 }
536
537 #[test]
538 fn test_parse_char_class_range() {
539 let g = Grammar::parse("root ::= [a-z]+").unwrap();
540 assert!(g.rules.contains_key("root"));
541 }
542
543 #[test]
544 fn test_parse_sequence() {
545 let g = Grammar::parse(r#"root ::= [a-z]+ ":" [0-9]+"#).unwrap();
546 assert!(!g.rules.is_empty());
547 }
548
549 #[test]
550 fn test_parse_multiple_rules() {
551 let grammar_str = "root ::= greeting\ngreeting ::= \"hello\"";
552 let g = Grammar::parse(grammar_str).unwrap();
553 assert_eq!(g.root, "root");
554 assert!(g.rules.contains_key("greeting"));
555 }
556
557 #[test]
558 fn test_parse_empty_grammar_fails() {
559 let result = Grammar::parse("");
560 assert!(result.is_err());
561 }
562
563 #[test]
564 fn test_parse_negated_char_class() {
565 let g = Grammar::parse("root ::= [^0-9]+").unwrap();
566 assert!(g.rules.contains_key("root"));
567 match g.rules.get("root").unwrap() {
568 GrammarNode::Repeat { node, .. } => match node.as_ref() {
569 GrammarNode::CharClass { negated, .. } => assert!(*negated),
570 other => panic!("expected CharClass, got {:?}", other),
571 },
572 other => panic!("expected Repeat, got {:?}", other),
573 }
574 }
575
576 #[test]
577 fn test_parse_grouping() {
578 let g = Grammar::parse(r#"root ::= ("a" | "b")+"#).unwrap();
579 assert!(g.rules.contains_key("root"));
580 }
581
582 #[test]
583 fn test_parse_json_simple() {
584 let grammar_str = r#"root ::= "{" ws "}"
586ws ::= [ \t\n]*"#;
587 let g = Grammar::parse(grammar_str).unwrap();
588 assert_eq!(g.root, "root");
589 }
590
591 #[test]
594 fn test_parse_escape_newline() {
595 let g = Grammar::parse(r#"root ::= "\n""#).expect("test: \\n escape should parse");
597 match g.rules.get("root").expect("test: root rule") {
598 GrammarNode::Literal(bytes) => assert_eq!(bytes, b"\n"),
599 other => panic!("expected Literal, got {:?}", other),
600 }
601 }
602
603 #[test]
604 fn test_parse_escape_tab() {
605 let g = Grammar::parse(r#"root ::= "\t""#).expect("test: \\t escape should parse");
606 match g.rules.get("root").expect("test: root rule") {
607 GrammarNode::Literal(bytes) => assert_eq!(bytes, b"\t"),
608 other => panic!("expected Literal, got {:?}", other),
609 }
610 }
611
612 #[test]
613 fn test_parse_escape_carriage_return() {
614 let g = Grammar::parse(r#"root ::= "\r""#).expect("test: \\r escape should parse");
615 match g.rules.get("root").expect("test: root rule") {
616 GrammarNode::Literal(bytes) => assert_eq!(bytes, b"\r"),
617 other => panic!("expected Literal, got {:?}", other),
618 }
619 }
620
621 #[test]
622 fn test_parse_escape_backslash() {
623 let g = Grammar::parse(r#"root ::= "\\""#).expect("test: \\\\ escape should parse");
624 match g.rules.get("root").expect("test: root rule") {
625 GrammarNode::Literal(bytes) => assert_eq!(bytes, b"\\"),
626 other => panic!("expected Literal, got {:?}", other),
627 }
628 }
629
630 #[test]
631 fn test_parse_escape_quote() {
632 let g = Grammar::parse(r#"root ::= "\"hi\"""#).expect("test: escaped quote should parse");
633 match g.rules.get("root").expect("test: root rule") {
634 GrammarNode::Literal(bytes) => {
635 assert_eq!(bytes[0], b'"');
636 assert_eq!(*bytes.last().expect("test: last byte"), b'"');
637 }
638 other => panic!("expected Literal, got {:?}", other),
639 }
640 }
641
642 #[test]
643 fn test_parse_escape_null() {
644 let g = Grammar::parse(r#"root ::= "\0""#).expect("test: \\0 escape should parse");
645 match g.rules.get("root").expect("test: root rule") {
646 GrammarNode::Literal(bytes) => assert_eq!(bytes, &[0u8]),
647 other => panic!("expected Literal, got {:?}", other),
648 }
649 }
650
651 #[test]
652 fn test_parse_escape_hex() {
653 let g = Grammar::parse(r#"root ::= "\x41""#).expect("test: \\x41 should parse");
655 match g.rules.get("root").expect("test: root rule") {
656 GrammarNode::Literal(bytes) => assert_eq!(bytes, &[0x41u8]),
657 other => panic!("expected Literal, got {:?}", other),
658 }
659 }
660
661 #[test]
662 fn test_parse_escape_unicode() {
663 let g = Grammar::parse(r#"root ::= "\u{0041}""#).expect("test: \\u{0041} should parse");
665 match g.rules.get("root").expect("test: root rule") {
666 GrammarNode::Literal(bytes) => assert_eq!(bytes, b"A"),
667 other => panic!("expected Literal, got {:?}", other),
668 }
669 }
670
671 #[test]
672 fn test_parse_escape_unicode_multibyte() {
673 let g = Grammar::parse(r#"root ::= "\u{263A}""#).expect("test: \\u{263A} should parse");
675 match g.rules.get("root").expect("test: root rule") {
676 GrammarNode::Literal(bytes) => {
677 assert_eq!(bytes, "☺".as_bytes());
678 }
679 other => panic!("expected Literal, got {:?}", other),
680 }
681 }
682
683 #[test]
686 fn test_parse_unterminated_string_errors() {
687 let result = Grammar::parse(r#"root ::= "abc"#);
688 assert!(result.is_err(), "unterminated string should be an error");
689 }
690
691 #[test]
692 fn test_parse_unknown_escape_errors() {
693 let result = Grammar::parse(r#"root ::= "\q""#);
694 assert!(result.is_err(), "unknown escape \\q should be an error");
695 }
696
697 #[test]
698 fn test_parse_truncated_escape_errors() {
699 let result = Grammar::parse("root ::= \"\\\"");
701 assert!(
704 result.is_err(),
705 "truncated escape at end of input should error"
706 );
707 }
708
709 #[test]
710 fn test_parse_hex_escape_invalid_digit_errors() {
711 let result = Grammar::parse(r#"root ::= "\xGG""#);
713 assert!(result.is_err(), "invalid hex digit in \\x should error");
714 }
715
716 #[test]
717 fn test_parse_unicode_escape_invalid_codepoint_errors() {
718 let grammar_str = "root ::= \"\\u{D800}\"";
722 let result = Grammar::parse(grammar_str);
723 assert!(
724 result.is_err(),
725 "surrogate codepoint \\u{{D800}} should error"
726 );
727 }
728
729 #[test]
730 fn test_parse_unicode_escape_too_many_digits_errors() {
731 let grammar_str = "root ::= \"\\u{1234567}\"";
733 let result = Grammar::parse(grammar_str);
734 assert!(result.is_err(), "\\u{{...}} with >6 digits should error");
735 }
736
737 #[test]
740 fn test_parse_char_class_escape_newline() {
741 let g = Grammar::parse(r"root ::= [\n]").expect("test: [\\n] should parse");
742 assert!(g.rules.contains_key("root"));
743 }
744
745 #[test]
746 fn test_parse_char_class_escape_tab() {
747 let g = Grammar::parse(r"root ::= [\t]").expect("test: [\\t] should parse");
748 assert!(g.rules.contains_key("root"));
749 }
750
751 #[test]
752 fn test_parse_char_class_escape_dash() {
753 let g = Grammar::parse(r"root ::= [\-]").expect("test: [\\-] should parse");
755 assert!(g.rules.contains_key("root"));
756 }
757
758 #[test]
759 fn test_parse_char_class_escape_caret() {
760 let g = Grammar::parse(r"root ::= [\^]").expect("test: [\\^] should parse");
762 assert!(g.rules.contains_key("root"));
763 }
764
765 #[test]
766 fn test_parse_char_class_escape_bracket() {
767 let g = Grammar::parse(r"root ::= [\]]").expect("test: [\\]] should parse");
769 assert!(g.rules.contains_key("root"));
770 }
771
772 #[test]
773 fn test_parse_char_class_hex_escape() {
774 let g = Grammar::parse(r"root ::= [\x41-\x5A]+").expect("test: [\\x41-\\x5A] should parse");
776 assert!(g.rules.contains_key("root"));
777 }
778
779 #[test]
780 fn test_parse_char_class_unknown_escape_errors() {
781 let result = Grammar::parse(r"root ::= [\q]");
782 assert!(
783 result.is_err(),
784 "unknown char class escape \\q should error"
785 );
786 }
787
788 #[test]
789 fn test_parse_unterminated_char_class_errors() {
790 let result = Grammar::parse("root ::= [a-z");
791 assert!(result.is_err(), "unterminated char class should error");
792 }
793
794 #[test]
797 fn test_parse_missing_assign_errors() {
798 let result = Grammar::parse("root = \"hello\"");
799 assert!(result.is_err(), "missing ::= should error");
800 }
801
802 #[test]
803 fn test_parse_unexpected_char_errors() {
804 let result = Grammar::parse("root ::= @");
806 assert!(
807 result.is_err(),
808 "unexpected char '@' in grammar atom should error"
809 );
810 }
811
812 #[test]
813 fn test_parse_unclosed_group_errors() {
814 let result = Grammar::parse(r#"root ::= ("abc""#);
815 assert!(result.is_err(), "unclosed group should error");
816 }
817
818 #[test]
819 fn test_parse_first_rule_becomes_root_when_no_root() {
820 let g = Grammar::parse("entry ::= \"hello\"").expect("test: single rule without root");
822 assert_eq!(g.root, "entry");
823 }
824
825 #[test]
826 fn test_parse_optional_quantifier() {
827 let g = Grammar::parse(r#"root ::= "a"?"#).expect("test: ? quantifier should parse");
829 match g.rules.get("root").expect("test: root rule") {
830 GrammarNode::Repeat { min, max, .. } => {
831 assert_eq!(*min, 0);
832 assert_eq!(*max, Some(1));
833 }
834 other => panic!("expected Repeat, got {:?}", other),
835 }
836 }
837
838 #[test]
839 fn test_parse_star_quantifier() {
840 let g = Grammar::parse(r#"root ::= "a"*"#).expect("test: * quantifier should parse");
841 match g.rules.get("root").expect("test: root rule") {
842 GrammarNode::Repeat { min, max, .. } => {
843 assert_eq!(*min, 0);
844 assert_eq!(*max, None);
845 }
846 other => panic!("expected Repeat, got {:?}", other),
847 }
848 }
849
850 #[test]
851 fn test_parse_plus_quantifier() {
852 let g = Grammar::parse(r#"root ::= "a"+"#).expect("test: + quantifier should parse");
853 match g.rules.get("root").expect("test: root rule") {
854 GrammarNode::Repeat { min, max, .. } => {
855 assert_eq!(*min, 1);
856 assert_eq!(*max, None);
857 }
858 other => panic!("expected Repeat, got {:?}", other),
859 }
860 }
861
862 #[test]
863 fn test_parse_comment_skipped() {
864 let grammar_str = "# This is a comment\nroot ::= \"hello\"";
866 let g = Grammar::parse(grammar_str).expect("test: comment should be ignored");
867 assert_eq!(g.root, "root");
868 }
869
870 #[test]
871 fn test_root_node_accessor_ok() {
872 let g = Grammar::parse(r#"root ::= "hi""#).expect("test: should parse");
873 g.root_node()
874 .expect("test: root_node() should succeed when root rule exists");
875 }
876
877 #[test]
878 fn test_root_node_accessor_missing_rule_errors() {
879 let g = Grammar {
881 rules: std::collections::HashMap::new(),
882 root: "missing".to_string(),
883 source: String::new(),
884 };
885 let result = g.root_node();
886 assert!(result.is_err(), "root_node() on missing rule should error");
887 match result {
888 Err(super::super::error::GrammarError::UnknownRule { rule }) => {
889 assert_eq!(rule, "missing");
890 }
891 other => panic!("expected UnknownRule, got {:?}", other),
892 }
893 }
894
895 #[test]
896 fn test_char_range_contains() {
897 let r = CharRange::range(b'a', b'z');
898 assert!(r.contains(b'a'));
899 assert!(r.contains(b'z'));
900 assert!(r.contains(b'm'));
901 assert!(!r.contains(b'A'));
902 assert!(!r.contains(b'0'));
903 }
904
905 #[test]
906 fn test_char_range_single() {
907 let r = CharRange::single(b'X');
908 assert!(r.contains(b'X'));
909 assert!(!r.contains(b'Y'));
910 assert!(!r.contains(b'W'));
911 }
912}