1use crate::pos::Pos;
4use crate::token::{Code, Token};
5
6#[derive(Debug, Clone, Copy, PartialEq, Eq)]
12pub enum Context {
13 BlockOut,
14 BlockIn,
15 FlowOut,
16 FlowIn,
17 BlockKey,
18 FlowKey,
19}
20
21#[derive(Debug, Clone)]
31pub struct State<'i> {
32 pub input: &'i str,
34 pub pos: Pos,
36 pub n: i32,
38 pub c: Context,
40}
41
42impl<'i> State<'i> {
43 #[must_use]
45 pub const fn new(input: &'i str) -> Self {
46 Self {
47 input,
48 pos: Pos::ORIGIN,
49 n: 0,
50 c: Context::BlockOut,
51 }
52 }
53
54 #[must_use]
56 pub const fn with_context(input: &'i str, n: i32, c: Context) -> Self {
57 Self {
58 input,
59 pos: Pos::ORIGIN,
60 n,
61 c,
62 }
63 }
64
65 #[must_use]
67 pub fn peek(&self) -> Option<char> {
68 self.input.chars().next()
69 }
70
71 #[must_use]
75 pub fn advance(self, ch: char) -> Self {
76 let byte_len = ch.len_utf8();
77 let new_input = &self.input[byte_len..];
78 let new_pos = if ch == '\n' {
79 Pos {
80 byte_offset: self.pos.byte_offset + byte_len,
81 char_offset: self.pos.char_offset + 1,
82 line: self.pos.line + 1,
83 column: 0,
84 }
85 } else {
86 Pos {
87 byte_offset: self.pos.byte_offset + byte_len,
88 char_offset: self.pos.char_offset + 1,
89 line: self.pos.line,
90 column: self.pos.column + 1,
91 }
92 };
93 Self {
94 input: new_input,
95 pos: new_pos,
96 n: self.n,
97 c: self.c,
98 }
99 }
100}
101
102#[derive(Debug)]
108pub enum Reply<'i> {
109 Success {
111 tokens: Vec<Token<'i>>,
112 state: State<'i>,
113 },
114 Failure,
117 Error(ParseError),
121}
122
123impl Reply<'_> {
124 const fn is_success(&self) -> bool {
125 matches!(self, Self::Success { .. })
126 }
127}
128
129#[derive(Debug, Clone)]
135pub struct ParseError {
136 pub pos: Pos,
137 pub label: &'static str,
138 pub message: String,
139}
140
141pub type Parser<'i> = Box<dyn Fn(State<'i>) -> Reply<'i> + 'i>;
152
153#[must_use]
162pub fn satisfy<'i, F>(predicate: F) -> Parser<'i>
163where
164 F: Fn(char) -> bool + 'i,
165{
166 Box::new(move |state: State<'i>| {
167 let Some(ch) = state.peek() else {
168 return Reply::Failure;
169 };
170 if !predicate(ch) {
171 return Reply::Failure;
172 }
173 let new_state = state.advance(ch);
174 Reply::Success {
175 tokens: Vec::new(),
176 state: new_state,
177 }
178 })
179}
180
181#[must_use]
183pub fn char_parser<'i>(expected: char) -> Parser<'i> {
184 satisfy(move |ch| ch == expected)
185}
186
187#[must_use]
189pub fn fail<'i>() -> Parser<'i> {
190 Box::new(|_state: State<'i>| Reply::Failure)
191}
192
193#[must_use]
203pub fn seq<'i>(a: Parser<'i>, b: Parser<'i>) -> Parser<'i> {
204 Box::new(move |state: State<'i>| match a(state) {
205 Reply::Failure => Reply::Failure,
206 Reply::Error(e) => Reply::Error(e),
207 Reply::Success {
208 tokens: mut tokens_a,
209 state: state_after_a,
210 } => match b(state_after_a) {
211 Reply::Failure => Reply::Failure,
212 Reply::Error(e) => Reply::Error(e),
213 Reply::Success {
214 tokens: tokens_b,
215 state: final_state,
216 } => {
217 tokens_a.extend(tokens_b);
218 Reply::Success {
219 tokens: tokens_a,
220 state: final_state,
221 }
222 }
223 },
224 })
225}
226
227#[must_use]
229pub fn alt<'i>(a: Parser<'i>, b: Parser<'i>) -> Parser<'i> {
230 Box::new(move |state: State<'i>| {
231 match a(state.clone()) {
233 Reply::Failure => b(state),
234 other @ (Reply::Success { .. } | Reply::Error(_)) => other,
235 }
236 })
237}
238
239#[must_use]
242pub fn many0<'i>(p: Parser<'i>) -> Parser<'i> {
243 Box::new(move |mut state: State<'i>| {
244 let mut all_tokens: Vec<Token<'i>> = Vec::new();
245 loop {
246 match p(state.clone()) {
247 Reply::Failure => {
248 return Reply::Success {
249 tokens: all_tokens,
250 state,
251 };
252 }
253 Reply::Error(e) => return Reply::Error(e),
254 Reply::Success { tokens, state: s } => {
255 all_tokens.extend(tokens);
256 state = s;
257 }
258 }
259 }
260 })
261}
262
263#[must_use]
265pub fn many1<'i>(p: Parser<'i>) -> Parser<'i> {
266 Box::new(move |state: State<'i>| match p(state) {
267 Reply::Failure => Reply::Failure,
268 Reply::Error(e) => Reply::Error(e),
269 Reply::Success {
270 tokens: mut first_tokens,
271 state: mut current_state,
272 } => loop {
273 match p(current_state.clone()) {
274 Reply::Failure => {
275 return Reply::Success {
276 tokens: first_tokens,
277 state: current_state,
278 };
279 }
280 Reply::Error(e) => return Reply::Error(e),
281 Reply::Success { tokens, state: s } => {
282 first_tokens.extend(tokens);
283 current_state = s;
284 }
285 }
286 },
287 })
288}
289
290#[must_use]
292pub fn opt<'i>(p: Parser<'i>) -> Parser<'i> {
293 Box::new(move |state: State<'i>| match p(state.clone()) {
294 Reply::Failure => Reply::Success {
295 tokens: Vec::new(),
296 state,
297 },
298 other @ (Reply::Success { .. } | Reply::Error(_)) => other,
299 })
300}
301
302#[must_use]
307pub fn exclude<'i>(p: Parser<'i>, q: Parser<'i>) -> Parser<'i> {
308 Box::new(move |state: State<'i>| {
309 if q(state.clone()).is_success() {
311 return Reply::Failure;
312 }
313 p(state)
314 })
315}
316
317#[must_use]
320pub fn lookahead<'i>(p: Parser<'i>) -> Parser<'i> {
321 Box::new(move |state: State<'i>| match p(state.clone()) {
322 Reply::Success { .. } => Reply::Success {
323 tokens: Vec::new(),
324 state,
325 },
326 Reply::Failure => Reply::Failure,
327 Reply::Error(e) => Reply::Error(e),
328 })
329}
330
331#[must_use]
334pub fn neg_lookahead<'i>(p: Parser<'i>) -> Parser<'i> {
335 Box::new(move |state: State<'i>| match p(state.clone()) {
336 Reply::Success { .. } => Reply::Failure,
337 Reply::Failure => Reply::Success {
338 tokens: Vec::new(),
339 state,
340 },
341 Reply::Error(e) => Reply::Error(e),
343 })
344}
345
346#[must_use]
349pub fn commit<'i>(label: &'static str, p: Parser<'i>) -> Parser<'i> {
350 Box::new(move |state: State<'i>| {
351 let pos = state.pos;
352 match p(state) {
353 Reply::Failure => Reply::Error(ParseError {
354 pos,
355 label,
356 message: format!("expected {label}"),
357 }),
358 other @ (Reply::Success { .. } | Reply::Error(_)) => other,
359 }
360 })
361}
362
363#[must_use]
367pub fn wrap_tokens<'i>(begin: Code, end: Code, p: Parser<'i>) -> Parser<'i> {
368 Box::new(move |state: State<'i>| {
369 let begin_pos = state.pos;
370 match p(state) {
371 Reply::Failure => Reply::Failure,
372 Reply::Error(e) => Reply::Error(e),
373 Reply::Success {
374 tokens: inner,
375 state: final_state,
376 } => {
377 let end_pos = final_state.pos;
378 let mut tokens = Vec::with_capacity(inner.len() + 2);
379 tokens.push(Token {
380 code: begin,
381 pos: begin_pos,
382 text: "",
383 });
384 tokens.extend(inner);
385 tokens.push(Token {
386 code: end,
387 pos: end_pos,
388 text: "",
389 });
390 Reply::Success {
391 tokens,
392 state: final_state,
393 }
394 }
395 }
396 })
397}
398
399#[must_use]
404pub fn token<'i>(code: Code, p: Parser<'i>) -> Parser<'i> {
405 Box::new(move |state: State<'i>| {
406 let start_pos = state.pos;
407 let start_input = state.input;
408 match p(state) {
409 Reply::Failure => Reply::Failure,
410 Reply::Error(e) => Reply::Error(e),
411 Reply::Success {
412 state: final_state, ..
413 } => {
414 let consumed_bytes = final_state.pos.byte_offset - start_pos.byte_offset;
415 let text = &start_input[..consumed_bytes];
416 Reply::Success {
417 tokens: vec![Token {
418 code,
419 pos: start_pos,
420 text,
421 }],
422 state: final_state,
423 }
424 }
425 }
426 })
427}
428
429#[cfg(test)]
434#[allow(clippy::indexing_slicing, clippy::expect_used, clippy::unwrap_used)]
435mod tests {
436 use super::*;
437
438 fn state(input: &str) -> State<'_> {
440 State::new(input)
441 }
442
443 fn state_at(input: &str, pos: Pos) -> State<'_> {
445 State {
446 input,
447 pos,
448 n: 0,
449 c: Context::BlockOut,
450 }
451 }
452
453 fn remaining<'a>(reply: &'a Reply<'a>) -> &'a str {
454 match reply {
455 Reply::Success { state, .. } => state.input,
456 Reply::Failure | Reply::Error(_) => panic!("expected success"),
457 }
458 }
459
460 fn tokens(reply: Reply<'_>) -> Vec<Code> {
461 match reply {
462 Reply::Success { tokens, .. } => tokens.into_iter().map(|t| t.code).collect(),
463 Reply::Failure | Reply::Error(_) => panic!("expected success"),
464 }
465 }
466
467 fn is_failure(reply: &Reply<'_>) -> bool {
468 matches!(reply, Reply::Failure)
469 }
470
471 fn is_error(reply: &Reply<'_>) -> bool {
472 matches!(reply, Reply::Error(_))
473 }
474
475 #[test]
480 fn seq_matches_both_parsers_in_order() {
481 let p = seq(char_parser('a'), char_parser('b'));
482 let reply = p(state("ab"));
483 assert_eq!(remaining(&reply), "");
484 }
485
486 #[test]
487 fn seq_fails_when_first_parser_fails() {
488 let p = seq(char_parser('x'), char_parser('b'));
489 let reply = p(state("ab"));
490 assert!(is_failure(&reply));
491 }
492
493 #[test]
494 fn seq_fails_when_second_parser_fails() {
495 let p = seq(char_parser('a'), char_parser('x'));
496 let reply = p(state("ab"));
497 assert!(is_failure(&reply));
498 }
499
500 #[test]
501 fn seq_on_empty_input_fails_when_non_empty_expected() {
502 let p = seq(char_parser('a'), char_parser('b'));
503 let reply = p(state(""));
504 assert!(is_failure(&reply));
505 }
506
507 #[test]
512 fn alt_matches_first_alternative() {
513 let p = alt(char_parser('a'), char_parser('b'));
514 let reply = p(state("a"));
515 assert_eq!(remaining(&reply), "");
516 }
517
518 #[test]
519 fn alt_falls_through_to_second_when_first_fails() {
520 let p = alt(char_parser('a'), char_parser('b'));
521 let reply = p(state("b"));
522 assert_eq!(remaining(&reply), "");
523 }
524
525 #[test]
526 fn alt_fails_when_both_alternatives_fail() {
527 let p = alt(char_parser('a'), char_parser('b'));
528 let reply = p(state("c"));
529 assert!(is_failure(&reply));
530 }
531
532 #[test]
533 fn alt_does_not_try_second_when_first_matches() {
534 let p = alt(
537 char_parser('a'),
538 Box::new(|_s: State<'_>| {
539 Reply::Error(ParseError {
540 pos: Pos::ORIGIN,
541 label: "should not be tried",
542 message: "alt tried second branch after first succeeded".into(),
543 })
544 }),
545 );
546 let reply = p(state("a"));
547 assert!(matches!(reply, Reply::Success { .. }));
549 }
550
551 #[test]
556 fn many0_matches_zero_occurrences() {
557 let p = many0(char_parser('a'));
558 let reply = p(state("b"));
559 assert_eq!(remaining(&reply), "b");
560 }
561
562 #[test]
563 fn many0_matches_multiple_occurrences() {
564 let p = many0(char_parser('a'));
565 let reply = p(state("aaab"));
566 assert_eq!(remaining(&reply), "b");
567 }
568
569 #[test]
570 fn many0_on_empty_input_succeeds_with_empty_result() {
571 let p = many0(char_parser('a'));
572 let reply = p(state(""));
573 assert_eq!(remaining(&reply), "");
574 }
575
576 #[test]
581 fn many1_fails_when_no_occurrences() {
582 let p = many1(char_parser('a'));
583 let reply = p(state("b"));
584 assert!(is_failure(&reply));
585 }
586
587 #[test]
588 fn many1_matches_single_occurrence() {
589 let p = many1(char_parser('a'));
590 let reply = p(state("ab"));
591 assert_eq!(remaining(&reply), "b");
592 }
593
594 #[test]
595 fn many1_matches_multiple_occurrences() {
596 let p = many1(char_parser('a'));
597 let reply = p(state("aaab"));
598 assert_eq!(remaining(&reply), "b");
599 }
600
601 #[test]
606 fn opt_returns_success_when_parser_matches() {
607 let p = opt(char_parser('a'));
608 let reply = p(state("ab"));
609 assert_eq!(remaining(&reply), "b");
610 }
611
612 #[test]
613 fn opt_returns_success_when_parser_does_not_match() {
614 let p = opt(char_parser('a'));
615 let reply = p(state("b"));
616 assert!(matches!(&reply, Reply::Success { .. }));
617 assert_eq!(remaining(&reply), "b");
618 }
619
620 #[test]
621 fn opt_always_succeeds_on_empty_input() {
622 let p = opt(char_parser('a'));
623 let reply = p(state(""));
624 assert!(matches!(&reply, Reply::Success { .. }));
625 }
626
627 #[test]
632 fn exclude_succeeds_when_p_matches_and_q_does_not() {
633 let p = exclude(char_parser('a'), char_parser('b'));
634 let reply = p(state("a"));
635 assert_eq!(remaining(&reply), "");
636 }
637
638 #[test]
639 fn exclude_fails_when_both_p_and_q_match() {
640 let p = exclude(char_parser('a'), char_parser('a'));
642 let reply = p(state("a"));
643 assert!(is_failure(&reply));
644 }
645
646 #[test]
647 fn exclude_fails_when_p_does_not_match() {
648 let p = exclude(char_parser('a'), char_parser('b'));
649 let reply = p(state("b"));
650 assert!(is_failure(&reply));
651 }
652
653 #[test]
658 fn lookahead_succeeds_without_consuming_input() {
659 let p = lookahead(char_parser('a'));
660 let reply = p(state("abc"));
661 assert_eq!(remaining(&reply), "abc");
662 }
663
664 #[test]
665 fn lookahead_fails_when_parser_fails() {
666 let p = lookahead(char_parser('x'));
667 let reply = p(state("abc"));
668 assert!(is_failure(&reply));
669 }
670
671 #[test]
676 fn neg_lookahead_succeeds_when_parser_fails() {
677 let p = neg_lookahead(char_parser('x'));
678 let reply = p(state("abc"));
679 assert!(matches!(&reply, Reply::Success { .. }));
680 assert_eq!(remaining(&reply), "abc");
681 }
682
683 #[test]
684 fn neg_lookahead_fails_when_parser_succeeds() {
685 let p = neg_lookahead(char_parser('a'));
686 let reply = p(state("abc"));
687 assert!(is_failure(&reply));
688 }
689
690 #[test]
695 fn commit_succeeds_and_inner_parser_output_is_preserved() {
696 let p = commit("char_a", char_parser('a'));
697 let reply = p(state("a"));
698 assert_eq!(remaining(&reply), "");
699 }
700
701 #[test]
702 fn commit_failure_becomes_error_not_backtrackable_failure() {
703 let p = alt(
707 seq(char_parser('a'), commit("after_a", char_parser('x'))),
708 char_parser('a'),
709 );
710 let reply = p(state("ab"));
711 assert!(is_error(&reply));
712 }
713
714 #[test]
719 fn wrap_tokens_emits_begin_token_first() {
720 let p = wrap_tokens(Code::BeginMapping, Code::EndMapping, char_parser('a'));
721 let codes = tokens(p(state("a")));
722 assert_eq!(codes.first().copied(), Some(Code::BeginMapping));
723 }
724
725 #[test]
726 fn wrap_tokens_emits_end_token_last() {
727 let p = wrap_tokens(Code::BeginMapping, Code::EndMapping, char_parser('a'));
728 let codes = tokens(p(state("a")));
729 assert_eq!(codes.last().copied(), Some(Code::EndMapping));
730 }
731
732 #[test]
733 fn wrap_tokens_inner_tokens_are_between_begin_and_end() {
734 let p = wrap_tokens(
736 Code::BeginScalar,
737 Code::EndScalar,
738 seq(
739 token(Code::Text, char_parser('h')),
740 token(Code::Text, char_parser('i')),
741 ),
742 );
743 let codes = tokens(p(state("hi")));
744 assert_eq!(
745 codes,
746 vec![Code::BeginScalar, Code::Text, Code::Text, Code::EndScalar]
747 );
748 }
749
750 #[test]
751 fn wrap_tokens_on_inner_failure_emits_no_tokens() {
752 let p = wrap_tokens(Code::BeginMapping, Code::EndMapping, char_parser('x'));
753 let reply = p(state("a"));
754 assert!(is_failure(&reply));
755 }
756
757 #[test]
762 fn token_emits_token_with_correct_code() {
763 let p = token(Code::Text, char_parser('a'));
764 let codes = tokens(p(state("a")));
765 assert_eq!(codes, vec![Code::Text]);
766 }
767
768 #[test]
769 fn token_emits_token_with_correct_position() {
770 let start_pos = Pos {
771 byte_offset: 5,
772 char_offset: 5,
773 line: 3,
774 column: 2,
775 };
776 let p = token(Code::Text, char_parser('a'));
777 let reply = p(state_at("a", start_pos));
778 match reply {
779 Reply::Success { tokens, .. } => {
780 assert_eq!(tokens.len(), 1);
781 assert_eq!(tokens[0].pos, start_pos);
782 }
783 Reply::Failure | Reply::Error(_) => panic!("expected success"),
784 }
785 }
786
787 #[test]
792 fn position_advances_by_byte_and_char_after_ascii_match() {
793 let p = char_parser('a');
794 let reply = p(state("ab"));
795 match reply {
796 Reply::Success { state, .. } => {
797 assert_eq!(state.pos.byte_offset, 1);
798 assert_eq!(state.pos.char_offset, 1);
799 assert_eq!(state.pos.column, 1);
800 assert_eq!(state.pos.line, 1);
801 }
802 Reply::Failure | Reply::Error(_) => panic!("expected success"),
803 }
804 }
805
806 #[test]
807 fn position_advances_correctly_after_newline() {
808 let p = char_parser('\n');
809 let reply = p(state("\n"));
810 match reply {
811 Reply::Success { state, .. } => {
812 assert_eq!(state.pos.line, 2);
813 assert_eq!(state.pos.column, 0);
814 assert_eq!(state.pos.byte_offset, 1);
815 }
816 Reply::Failure | Reply::Error(_) => panic!("expected success"),
817 }
818 }
819
820 #[test]
821 fn position_advances_by_correct_byte_count_for_multibyte_char() {
822 let p = char_parser('é');
824 let reply = p(state("é"));
825 match reply {
826 Reply::Success { state, .. } => {
827 assert_eq!(state.pos.byte_offset, 2);
828 assert_eq!(state.pos.char_offset, 1);
829 }
830 Reply::Failure | Reply::Error(_) => panic!("expected success"),
831 }
832 }
833
834 #[test]
835 fn position_advances_by_correct_byte_count_for_three_byte_char() {
836 let p = char_parser('中');
838 let reply = p(state("中"));
839 match reply {
840 Reply::Success { state, .. } => {
841 assert_eq!(state.pos.byte_offset, 3);
842 assert_eq!(state.pos.char_offset, 1);
843 }
844 Reply::Failure | Reply::Error(_) => panic!("expected success"),
845 }
846 }
847
848 #[test]
853 fn state_carries_indentation_level() {
854 let p = char_parser('a');
855 let s = State::with_context("a", 4, Context::BlockOut);
856 match p(s) {
857 Reply::Success { state, .. } => assert_eq!(state.n, 4),
858 Reply::Failure | Reply::Error(_) => panic!("expected success"),
859 }
860 }
861
862 #[test]
863 fn state_carries_context_mode() {
864 let p = char_parser('a');
865 let s = State::with_context("a", 0, Context::FlowIn);
866 match p(s) {
867 Reply::Success { state, .. } => {
868 assert_eq!(state.c, Context::FlowIn);
869 }
870 Reply::Failure | Reply::Error(_) => panic!("expected success"),
871 }
872 }
873
874 #[test]
875 fn context_enum_has_all_six_variants() {
876 let ctx = Context::BlockOut;
877 let _ = match ctx {
878 Context::BlockOut => 0,
879 Context::BlockIn => 1,
880 Context::FlowOut => 2,
881 Context::FlowIn => 3,
882 Context::BlockKey => 4,
883 Context::FlowKey => 5,
884 };
885 }
886
887 #[test]
892 fn composed_combinators_parse_simple_sequence_correctly() {
893 let p = seq(
895 many1(char_parser('a')),
896 seq(char_parser(':'), many0(char_parser(' '))),
897 );
898 let reply = p(state("aaa: "));
899 assert_eq!(remaining(&reply), "");
900 }
901
902 #[test]
903 fn alt_of_seq_correctly_backtracks_on_partial_match() {
904 let p = alt(
907 seq(char_parser('a'), char_parser('b')),
908 seq(char_parser('a'), char_parser('c')),
909 );
910 let reply = p(state("ac"));
911 assert_eq!(remaining(&reply), "");
912 }
913}