rlsp_yaml_parser/
combinator.rs

1// SPDX-License-Identifier: MIT
2
3use crate::pos::Pos;
4use crate::token::{Code, Token};
5
6// ---------------------------------------------------------------------------
7// Context
8// ---------------------------------------------------------------------------
9
10/// YAML 1.2 context modes (spec §6, §7, §8).
11#[derive(Debug, Clone, Copy, PartialEq, Eq)]
12pub enum Context {
13    BlockOut,
14    BlockIn,
15    FlowOut,
16    FlowIn,
17    BlockKey,
18    FlowKey,
19}
20
21// ---------------------------------------------------------------------------
22// State
23// ---------------------------------------------------------------------------
24
25/// Immutable parser state threaded through every combinator.
26///
27/// `State` borrows the input slice for the lifetime `'i`.  Positions in the
28/// slice are tracked via `pos`; `n` and `c` carry the YAML context parameters
29/// (indentation level and context mode) through combinator composition.
30#[derive(Debug, Clone)]
31pub struct State<'i> {
32    /// Remaining (unconsumed) input.
33    pub input: &'i str,
34    /// Position of the first byte of `input` within the original document.
35    pub pos: Pos,
36    /// Indentation level `n` (YAML spec parameter).
37    pub n: i32,
38    /// Context mode `c` (YAML spec parameter).
39    pub c: Context,
40}
41
42impl<'i> State<'i> {
43    /// Construct a fresh state at the beginning of `input`.
44    #[must_use]
45    pub const fn new(input: &'i str) -> Self {
46        Self {
47            input,
48            pos: Pos::ORIGIN,
49            n: 0,
50            c: Context::BlockOut,
51        }
52    }
53
54    /// Construct a state with explicit context parameters.
55    #[must_use]
56    pub const fn with_context(input: &'i str, n: i32, c: Context) -> Self {
57        Self {
58            input,
59            pos: Pos::ORIGIN,
60            n,
61            c,
62        }
63    }
64
65    /// Peek at the next `char` without advancing.
66    #[must_use]
67    pub fn peek(&self) -> Option<char> {
68        self.input.chars().next()
69    }
70
71    /// Advance the state past `ch`, returning the updated state.
72    ///
73    /// The `ch` must equal the first character of `self.input`.
74    #[must_use]
75    pub fn advance(self, ch: char) -> Self {
76        let byte_len = ch.len_utf8();
77        let new_input = &self.input[byte_len..];
78        let new_pos = if ch == '\n' {
79            Pos {
80                byte_offset: self.pos.byte_offset + byte_len,
81                char_offset: self.pos.char_offset + 1,
82                line: self.pos.line + 1,
83                column: 0,
84            }
85        } else {
86            Pos {
87                byte_offset: self.pos.byte_offset + byte_len,
88                char_offset: self.pos.char_offset + 1,
89                line: self.pos.line,
90                column: self.pos.column + 1,
91            }
92        };
93        Self {
94            input: new_input,
95            pos: new_pos,
96            n: self.n,
97            c: self.c,
98        }
99    }
100}
101
102// ---------------------------------------------------------------------------
103// Reply
104// ---------------------------------------------------------------------------
105
106/// The outcome of applying a parser to a `State`.
107#[derive(Debug)]
108pub enum Reply<'i> {
109    /// The parser matched; tokens are accumulated; state is the updated state.
110    Success {
111        tokens: Vec<Token<'i>>,
112        state: State<'i>,
113    },
114    /// The parser did not match; no input was consumed; the state is
115    /// unchanged.  The caller may try an alternative.
116    Failure,
117    /// The parser encountered an unrecoverable error after committing to a
118    /// branch.  Alternatives are not tried — this propagates up the call
119    /// stack unchanged.
120    Error(ParseError),
121}
122
123impl Reply<'_> {
124    const fn is_success(&self) -> bool {
125        matches!(self, Self::Success { .. })
126    }
127}
128
129// ---------------------------------------------------------------------------
130// ParseError
131// ---------------------------------------------------------------------------
132
133/// A non-recoverable parse error produced after `commit`.
134#[derive(Debug, Clone)]
135pub struct ParseError {
136    pub pos: Pos,
137    pub label: &'static str,
138    pub message: String,
139}
140
141// ---------------------------------------------------------------------------
142// Parser type
143// ---------------------------------------------------------------------------
144
145/// A parser is a function from `State` to `Reply`.
146///
147/// Using `Box<dyn Fn>` keeps the API simple and avoids pervasive generic
148/// parameters on every combinator.  The hot-path cost is a single
149/// indirection per combinator call; this is acceptable for the scaffold phase
150/// and can be revisited after profiling.
151pub type Parser<'i> = Box<dyn Fn(State<'i>) -> Reply<'i> + 'i>;
152
153// ---------------------------------------------------------------------------
154// Primitive parser builders
155// ---------------------------------------------------------------------------
156
157/// Match a single character that satisfies `predicate`.
158///
159/// On success the matched text is the UTF-8 encoding of the character and no
160/// token is emitted (use `token()` around this to produce tokens).
161#[must_use]
162pub fn satisfy<'i, F>(predicate: F) -> Parser<'i>
163where
164    F: Fn(char) -> bool + 'i,
165{
166    Box::new(move |state: State<'i>| {
167        let Some(ch) = state.peek() else {
168            return Reply::Failure;
169        };
170        if !predicate(ch) {
171            return Reply::Failure;
172        }
173        let new_state = state.advance(ch);
174        Reply::Success {
175            tokens: Vec::new(),
176            state: new_state,
177        }
178    })
179}
180
181/// Match a specific character.
182#[must_use]
183pub fn char_parser<'i>(expected: char) -> Parser<'i> {
184    satisfy(move |ch| ch == expected)
185}
186
187/// Always fail without consuming input.
188#[must_use]
189pub fn fail<'i>() -> Parser<'i> {
190    Box::new(|_state: State<'i>| Reply::Failure)
191}
192
193// ---------------------------------------------------------------------------
194// Core combinators
195// ---------------------------------------------------------------------------
196
197/// Sequence: match `a` then `b`, accumulating tokens from both.
198///
199/// If `a` fails, the whole `seq` fails with no input consumed.
200/// If `a` succeeds but `b` fails (or errors), the whole `seq` backtracks to
201/// the state before `a`.
202#[must_use]
203pub fn seq<'i>(a: Parser<'i>, b: Parser<'i>) -> Parser<'i> {
204    Box::new(move |state: State<'i>| match a(state) {
205        Reply::Failure => Reply::Failure,
206        Reply::Error(e) => Reply::Error(e),
207        Reply::Success {
208            tokens: mut tokens_a,
209            state: state_after_a,
210        } => match b(state_after_a) {
211            Reply::Failure => Reply::Failure,
212            Reply::Error(e) => Reply::Error(e),
213            Reply::Success {
214                tokens: tokens_b,
215                state: final_state,
216            } => {
217                tokens_a.extend(tokens_b);
218                Reply::Success {
219                    tokens: tokens_a,
220                    state: final_state,
221                }
222            }
223        },
224    })
225}
226
227/// Ordered alternative: try `a`; if it fails (not errors), try `b`.
228#[must_use]
229pub fn alt<'i>(a: Parser<'i>, b: Parser<'i>) -> Parser<'i> {
230    Box::new(move |state: State<'i>| {
231        // We must clone here so we can pass the same state to `b` on failure.
232        match a(state.clone()) {
233            Reply::Failure => b(state),
234            other @ (Reply::Success { .. } | Reply::Error(_)) => other,
235        }
236    })
237}
238
239/// Zero-or-more repetition: always succeeds, consuming as many matches as
240/// possible.
241#[must_use]
242pub fn many0<'i>(p: Parser<'i>) -> Parser<'i> {
243    Box::new(move |mut state: State<'i>| {
244        let mut all_tokens: Vec<Token<'i>> = Vec::new();
245        loop {
246            match p(state.clone()) {
247                Reply::Failure => {
248                    return Reply::Success {
249                        tokens: all_tokens,
250                        state,
251                    };
252                }
253                Reply::Error(e) => return Reply::Error(e),
254                Reply::Success { tokens, state: s } => {
255                    all_tokens.extend(tokens);
256                    state = s;
257                }
258            }
259        }
260    })
261}
262
263/// One-or-more repetition: fails if there is not at least one match.
264#[must_use]
265pub fn many1<'i>(p: Parser<'i>) -> Parser<'i> {
266    Box::new(move |state: State<'i>| match p(state) {
267        Reply::Failure => Reply::Failure,
268        Reply::Error(e) => Reply::Error(e),
269        Reply::Success {
270            tokens: mut first_tokens,
271            state: mut current_state,
272        } => loop {
273            match p(current_state.clone()) {
274                Reply::Failure => {
275                    return Reply::Success {
276                        tokens: first_tokens,
277                        state: current_state,
278                    };
279                }
280                Reply::Error(e) => return Reply::Error(e),
281                Reply::Success { tokens, state: s } => {
282                    first_tokens.extend(tokens);
283                    current_state = s;
284                }
285            }
286        },
287    })
288}
289
290/// Optional: always succeeds; produces an empty result if `p` fails.
291#[must_use]
292pub fn opt<'i>(p: Parser<'i>) -> Parser<'i> {
293    Box::new(move |state: State<'i>| match p(state.clone()) {
294        Reply::Failure => Reply::Success {
295            tokens: Vec::new(),
296            state,
297        },
298        other @ (Reply::Success { .. } | Reply::Error(_)) => other,
299    })
300}
301
302/// Exclusion: match `p` only if `q` does not also match at the same position.
303///
304/// Neither `p` nor `q` consume input when `q` is checked — this is a
305/// positive `p` with a negative lookahead for `q`.
306#[must_use]
307pub fn exclude<'i>(p: Parser<'i>, q: Parser<'i>) -> Parser<'i> {
308    Box::new(move |state: State<'i>| {
309        // Check q first (lookahead — no input consumed by q).
310        if q(state.clone()).is_success() {
311            return Reply::Failure;
312        }
313        p(state)
314    })
315}
316
317/// Positive lookahead: succeeds if `p` would succeed, but consumes no input
318/// and emits no tokens.
319#[must_use]
320pub fn lookahead<'i>(p: Parser<'i>) -> Parser<'i> {
321    Box::new(move |state: State<'i>| match p(state.clone()) {
322        Reply::Success { .. } => Reply::Success {
323            tokens: Vec::new(),
324            state,
325        },
326        Reply::Failure => Reply::Failure,
327        Reply::Error(e) => Reply::Error(e),
328    })
329}
330
331/// Negative lookahead: succeeds if `p` would *fail*, consumes no input, and
332/// emits no tokens.
333#[must_use]
334pub fn neg_lookahead<'i>(p: Parser<'i>) -> Parser<'i> {
335    Box::new(move |state: State<'i>| match p(state.clone()) {
336        Reply::Success { .. } => Reply::Failure,
337        Reply::Failure => Reply::Success {
338            tokens: Vec::new(),
339            state,
340        },
341        // Propagate errors unchanged even in negative lookahead.
342        Reply::Error(e) => Reply::Error(e),
343    })
344}
345
346/// Commit (cut): run `p` and convert any `Failure` into an `Error`,
347/// preventing backtracking past this point.
348#[must_use]
349pub fn commit<'i>(label: &'static str, p: Parser<'i>) -> Parser<'i> {
350    Box::new(move |state: State<'i>| {
351        let pos = state.pos;
352        match p(state) {
353            Reply::Failure => Reply::Error(ParseError {
354                pos,
355                label,
356                message: format!("expected {label}"),
357            }),
358            other @ (Reply::Success { .. } | Reply::Error(_)) => other,
359        }
360    })
361}
362
363/// Emit a `Begin`/`End` token pair around the tokens produced by `p`.
364///
365/// If `p` fails or errors, no tokens are emitted (no orphaned Begin token).
366#[must_use]
367pub fn wrap_tokens<'i>(begin: Code, end: Code, p: Parser<'i>) -> Parser<'i> {
368    Box::new(move |state: State<'i>| {
369        let begin_pos = state.pos;
370        match p(state) {
371            Reply::Failure => Reply::Failure,
372            Reply::Error(e) => Reply::Error(e),
373            Reply::Success {
374                tokens: inner,
375                state: final_state,
376            } => {
377                let end_pos = final_state.pos;
378                let mut tokens = Vec::with_capacity(inner.len() + 2);
379                tokens.push(Token {
380                    code: begin,
381                    pos: begin_pos,
382                    text: "",
383                });
384                tokens.extend(inner);
385                tokens.push(Token {
386                    code: end,
387                    pos: end_pos,
388                    text: "",
389                });
390                Reply::Success {
391                    tokens,
392                    state: final_state,
393                }
394            }
395        }
396    })
397}
398
399/// Emit a single token with `code` for the text consumed by `p`.
400///
401/// The token's position is the position at the start of `p`'s match and the
402/// text is the slice of input consumed.
403#[must_use]
404pub fn token<'i>(code: Code, p: Parser<'i>) -> Parser<'i> {
405    Box::new(move |state: State<'i>| {
406        let start_pos = state.pos;
407        let start_input = state.input;
408        match p(state) {
409            Reply::Failure => Reply::Failure,
410            Reply::Error(e) => Reply::Error(e),
411            Reply::Success {
412                state: final_state, ..
413            } => {
414                let consumed_bytes = final_state.pos.byte_offset - start_pos.byte_offset;
415                let text = &start_input[..consumed_bytes];
416                Reply::Success {
417                    tokens: vec![Token {
418                        code,
419                        pos: start_pos,
420                        text,
421                    }],
422                    state: final_state,
423                }
424            }
425        }
426    })
427}
428
429// ---------------------------------------------------------------------------
430// Tests
431// ---------------------------------------------------------------------------
432
433#[cfg(test)]
434#[allow(clippy::indexing_slicing, clippy::expect_used, clippy::unwrap_used)]
435mod tests {
436    use super::*;
437
438    // Helper: build a State starting at the origin with BlockOut context.
439    fn state(input: &str) -> State<'_> {
440        State::new(input)
441    }
442
443    // Helper: build a State with explicit pos (for position-tracking tests).
444    fn state_at(input: &str, pos: Pos) -> State<'_> {
445        State {
446            input,
447            pos,
448            n: 0,
449            c: Context::BlockOut,
450        }
451    }
452
453    fn remaining<'a>(reply: &'a Reply<'a>) -> &'a str {
454        match reply {
455            Reply::Success { state, .. } => state.input,
456            Reply::Failure | Reply::Error(_) => panic!("expected success"),
457        }
458    }
459
460    fn tokens(reply: Reply<'_>) -> Vec<Code> {
461        match reply {
462            Reply::Success { tokens, .. } => tokens.into_iter().map(|t| t.code).collect(),
463            Reply::Failure | Reply::Error(_) => panic!("expected success"),
464        }
465    }
466
467    fn is_failure(reply: &Reply<'_>) -> bool {
468        matches!(reply, Reply::Failure)
469    }
470
471    fn is_error(reply: &Reply<'_>) -> bool {
472        matches!(reply, Reply::Error(_))
473    }
474
475    // -----------------------------------------------------------------------
476    // seq
477    // -----------------------------------------------------------------------
478
479    #[test]
480    fn seq_matches_both_parsers_in_order() {
481        let p = seq(char_parser('a'), char_parser('b'));
482        let reply = p(state("ab"));
483        assert_eq!(remaining(&reply), "");
484    }
485
486    #[test]
487    fn seq_fails_when_first_parser_fails() {
488        let p = seq(char_parser('x'), char_parser('b'));
489        let reply = p(state("ab"));
490        assert!(is_failure(&reply));
491    }
492
493    #[test]
494    fn seq_fails_when_second_parser_fails() {
495        let p = seq(char_parser('a'), char_parser('x'));
496        let reply = p(state("ab"));
497        assert!(is_failure(&reply));
498    }
499
500    #[test]
501    fn seq_on_empty_input_fails_when_non_empty_expected() {
502        let p = seq(char_parser('a'), char_parser('b'));
503        let reply = p(state(""));
504        assert!(is_failure(&reply));
505    }
506
507    // -----------------------------------------------------------------------
508    // alt
509    // -----------------------------------------------------------------------
510
511    #[test]
512    fn alt_matches_first_alternative() {
513        let p = alt(char_parser('a'), char_parser('b'));
514        let reply = p(state("a"));
515        assert_eq!(remaining(&reply), "");
516    }
517
518    #[test]
519    fn alt_falls_through_to_second_when_first_fails() {
520        let p = alt(char_parser('a'), char_parser('b'));
521        let reply = p(state("b"));
522        assert_eq!(remaining(&reply), "");
523    }
524
525    #[test]
526    fn alt_fails_when_both_alternatives_fail() {
527        let p = alt(char_parser('a'), char_parser('b'));
528        let reply = p(state("c"));
529        assert!(is_failure(&reply));
530    }
531
532    #[test]
533    fn alt_does_not_try_second_when_first_matches() {
534        // The second parser always produces an Error (not a Failure).
535        // If alt tried it after a successful first branch, we would see Error.
536        let p = alt(
537            char_parser('a'),
538            Box::new(|_s: State<'_>| {
539                Reply::Error(ParseError {
540                    pos: Pos::ORIGIN,
541                    label: "should not be tried",
542                    message: "alt tried second branch after first succeeded".into(),
543                })
544            }),
545        );
546        let reply = p(state("a"));
547        // Should be Success, not Error
548        assert!(matches!(reply, Reply::Success { .. }));
549    }
550
551    // -----------------------------------------------------------------------
552    // many0
553    // -----------------------------------------------------------------------
554
555    #[test]
556    fn many0_matches_zero_occurrences() {
557        let p = many0(char_parser('a'));
558        let reply = p(state("b"));
559        assert_eq!(remaining(&reply), "b");
560    }
561
562    #[test]
563    fn many0_matches_multiple_occurrences() {
564        let p = many0(char_parser('a'));
565        let reply = p(state("aaab"));
566        assert_eq!(remaining(&reply), "b");
567    }
568
569    #[test]
570    fn many0_on_empty_input_succeeds_with_empty_result() {
571        let p = many0(char_parser('a'));
572        let reply = p(state(""));
573        assert_eq!(remaining(&reply), "");
574    }
575
576    // -----------------------------------------------------------------------
577    // many1
578    // -----------------------------------------------------------------------
579
580    #[test]
581    fn many1_fails_when_no_occurrences() {
582        let p = many1(char_parser('a'));
583        let reply = p(state("b"));
584        assert!(is_failure(&reply));
585    }
586
587    #[test]
588    fn many1_matches_single_occurrence() {
589        let p = many1(char_parser('a'));
590        let reply = p(state("ab"));
591        assert_eq!(remaining(&reply), "b");
592    }
593
594    #[test]
595    fn many1_matches_multiple_occurrences() {
596        let p = many1(char_parser('a'));
597        let reply = p(state("aaab"));
598        assert_eq!(remaining(&reply), "b");
599    }
600
601    // -----------------------------------------------------------------------
602    // opt
603    // -----------------------------------------------------------------------
604
605    #[test]
606    fn opt_returns_success_when_parser_matches() {
607        let p = opt(char_parser('a'));
608        let reply = p(state("ab"));
609        assert_eq!(remaining(&reply), "b");
610    }
611
612    #[test]
613    fn opt_returns_success_when_parser_does_not_match() {
614        let p = opt(char_parser('a'));
615        let reply = p(state("b"));
616        assert!(matches!(&reply, Reply::Success { .. }));
617        assert_eq!(remaining(&reply), "b");
618    }
619
620    #[test]
621    fn opt_always_succeeds_on_empty_input() {
622        let p = opt(char_parser('a'));
623        let reply = p(state(""));
624        assert!(matches!(&reply, Reply::Success { .. }));
625    }
626
627    // -----------------------------------------------------------------------
628    // exclude
629    // -----------------------------------------------------------------------
630
631    #[test]
632    fn exclude_succeeds_when_p_matches_and_q_does_not() {
633        let p = exclude(char_parser('a'), char_parser('b'));
634        let reply = p(state("a"));
635        assert_eq!(remaining(&reply), "");
636    }
637
638    #[test]
639    fn exclude_fails_when_both_p_and_q_match() {
640        // p = 'a', q = 'a' — both match the same input
641        let p = exclude(char_parser('a'), char_parser('a'));
642        let reply = p(state("a"));
643        assert!(is_failure(&reply));
644    }
645
646    #[test]
647    fn exclude_fails_when_p_does_not_match() {
648        let p = exclude(char_parser('a'), char_parser('b'));
649        let reply = p(state("b"));
650        assert!(is_failure(&reply));
651    }
652
653    // -----------------------------------------------------------------------
654    // lookahead
655    // -----------------------------------------------------------------------
656
657    #[test]
658    fn lookahead_succeeds_without_consuming_input() {
659        let p = lookahead(char_parser('a'));
660        let reply = p(state("abc"));
661        assert_eq!(remaining(&reply), "abc");
662    }
663
664    #[test]
665    fn lookahead_fails_when_parser_fails() {
666        let p = lookahead(char_parser('x'));
667        let reply = p(state("abc"));
668        assert!(is_failure(&reply));
669    }
670
671    // -----------------------------------------------------------------------
672    // neg_lookahead
673    // -----------------------------------------------------------------------
674
675    #[test]
676    fn neg_lookahead_succeeds_when_parser_fails() {
677        let p = neg_lookahead(char_parser('x'));
678        let reply = p(state("abc"));
679        assert!(matches!(&reply, Reply::Success { .. }));
680        assert_eq!(remaining(&reply), "abc");
681    }
682
683    #[test]
684    fn neg_lookahead_fails_when_parser_succeeds() {
685        let p = neg_lookahead(char_parser('a'));
686        let reply = p(state("abc"));
687        assert!(is_failure(&reply));
688    }
689
690    // -----------------------------------------------------------------------
691    // commit
692    // -----------------------------------------------------------------------
693
694    #[test]
695    fn commit_succeeds_and_inner_parser_output_is_preserved() {
696        let p = commit("char_a", char_parser('a'));
697        let reply = p(state("a"));
698        assert_eq!(remaining(&reply), "");
699    }
700
701    #[test]
702    fn commit_failure_becomes_error_not_backtrackable_failure() {
703        // alt(seq('a', commit("x", 'x')), 'a') on "ab":
704        // - First branch: 'a' matches, commit("x",'x') fails → Error
705        // - alt sees Error (not Failure) → does NOT try second branch
706        let p = alt(
707            seq(char_parser('a'), commit("after_a", char_parser('x'))),
708            char_parser('a'),
709        );
710        let reply = p(state("ab"));
711        assert!(is_error(&reply));
712    }
713
714    // -----------------------------------------------------------------------
715    // wrap_tokens
716    // -----------------------------------------------------------------------
717
718    #[test]
719    fn wrap_tokens_emits_begin_token_first() {
720        let p = wrap_tokens(Code::BeginMapping, Code::EndMapping, char_parser('a'));
721        let codes = tokens(p(state("a")));
722        assert_eq!(codes.first().copied(), Some(Code::BeginMapping));
723    }
724
725    #[test]
726    fn wrap_tokens_emits_end_token_last() {
727        let p = wrap_tokens(Code::BeginMapping, Code::EndMapping, char_parser('a'));
728        let codes = tokens(p(state("a")));
729        assert_eq!(codes.last().copied(), Some(Code::EndMapping));
730    }
731
732    #[test]
733    fn wrap_tokens_inner_tokens_are_between_begin_and_end() {
734        // Use token() to produce inner tokens for 'h' and 'i'.
735        let p = wrap_tokens(
736            Code::BeginScalar,
737            Code::EndScalar,
738            seq(
739                token(Code::Text, char_parser('h')),
740                token(Code::Text, char_parser('i')),
741            ),
742        );
743        let codes = tokens(p(state("hi")));
744        assert_eq!(
745            codes,
746            vec![Code::BeginScalar, Code::Text, Code::Text, Code::EndScalar]
747        );
748    }
749
750    #[test]
751    fn wrap_tokens_on_inner_failure_emits_no_tokens() {
752        let p = wrap_tokens(Code::BeginMapping, Code::EndMapping, char_parser('x'));
753        let reply = p(state("a"));
754        assert!(is_failure(&reply));
755    }
756
757    // -----------------------------------------------------------------------
758    // token
759    // -----------------------------------------------------------------------
760
761    #[test]
762    fn token_emits_token_with_correct_code() {
763        let p = token(Code::Text, char_parser('a'));
764        let codes = tokens(p(state("a")));
765        assert_eq!(codes, vec![Code::Text]);
766    }
767
768    #[test]
769    fn token_emits_token_with_correct_position() {
770        let start_pos = Pos {
771            byte_offset: 5,
772            char_offset: 5,
773            line: 3,
774            column: 2,
775        };
776        let p = token(Code::Text, char_parser('a'));
777        let reply = p(state_at("a", start_pos));
778        match reply {
779            Reply::Success { tokens, .. } => {
780                assert_eq!(tokens.len(), 1);
781                assert_eq!(tokens[0].pos, start_pos);
782            }
783            Reply::Failure | Reply::Error(_) => panic!("expected success"),
784        }
785    }
786
787    // -----------------------------------------------------------------------
788    // Position tracking
789    // -----------------------------------------------------------------------
790
791    #[test]
792    fn position_advances_by_byte_and_char_after_ascii_match() {
793        let p = char_parser('a');
794        let reply = p(state("ab"));
795        match reply {
796            Reply::Success { state, .. } => {
797                assert_eq!(state.pos.byte_offset, 1);
798                assert_eq!(state.pos.char_offset, 1);
799                assert_eq!(state.pos.column, 1);
800                assert_eq!(state.pos.line, 1);
801            }
802            Reply::Failure | Reply::Error(_) => panic!("expected success"),
803        }
804    }
805
806    #[test]
807    fn position_advances_correctly_after_newline() {
808        let p = char_parser('\n');
809        let reply = p(state("\n"));
810        match reply {
811            Reply::Success { state, .. } => {
812                assert_eq!(state.pos.line, 2);
813                assert_eq!(state.pos.column, 0);
814                assert_eq!(state.pos.byte_offset, 1);
815            }
816            Reply::Failure | Reply::Error(_) => panic!("expected success"),
817        }
818    }
819
820    #[test]
821    fn position_advances_by_correct_byte_count_for_multibyte_char() {
822        // 'é' (U+00E9) is 2 bytes in UTF-8
823        let p = char_parser('é');
824        let reply = p(state("é"));
825        match reply {
826            Reply::Success { state, .. } => {
827                assert_eq!(state.pos.byte_offset, 2);
828                assert_eq!(state.pos.char_offset, 1);
829            }
830            Reply::Failure | Reply::Error(_) => panic!("expected success"),
831        }
832    }
833
834    #[test]
835    fn position_advances_by_correct_byte_count_for_three_byte_char() {
836        // '中' (U+4E2D) is 3 bytes in UTF-8
837        let p = char_parser('中');
838        let reply = p(state("中"));
839        match reply {
840            Reply::Success { state, .. } => {
841                assert_eq!(state.pos.byte_offset, 3);
842                assert_eq!(state.pos.char_offset, 1);
843            }
844            Reply::Failure | Reply::Error(_) => panic!("expected success"),
845        }
846    }
847
848    // -----------------------------------------------------------------------
849    // Context threading
850    // -----------------------------------------------------------------------
851
852    #[test]
853    fn state_carries_indentation_level() {
854        let p = char_parser('a');
855        let s = State::with_context("a", 4, Context::BlockOut);
856        match p(s) {
857            Reply::Success { state, .. } => assert_eq!(state.n, 4),
858            Reply::Failure | Reply::Error(_) => panic!("expected success"),
859        }
860    }
861
862    #[test]
863    fn state_carries_context_mode() {
864        let p = char_parser('a');
865        let s = State::with_context("a", 0, Context::FlowIn);
866        match p(s) {
867            Reply::Success { state, .. } => {
868                assert_eq!(state.c, Context::FlowIn);
869            }
870            Reply::Failure | Reply::Error(_) => panic!("expected success"),
871        }
872    }
873
874    #[test]
875    fn context_enum_has_all_six_variants() {
876        let ctx = Context::BlockOut;
877        let _ = match ctx {
878            Context::BlockOut => 0,
879            Context::BlockIn => 1,
880            Context::FlowOut => 2,
881            Context::FlowIn => 3,
882            Context::BlockKey => 4,
883            Context::FlowKey => 5,
884        };
885    }
886
887    // -----------------------------------------------------------------------
888    // Combinator composition
889    // -----------------------------------------------------------------------
890
891    #[test]
892    fn composed_combinators_parse_simple_sequence_correctly() {
893        // seq(many1('a'), seq(':', many0(' '))) on "aaa: "
894        let p = seq(
895            many1(char_parser('a')),
896            seq(char_parser(':'), many0(char_parser(' '))),
897        );
898        let reply = p(state("aaa: "));
899        assert_eq!(remaining(&reply), "");
900    }
901
902    #[test]
903    fn alt_of_seq_correctly_backtracks_on_partial_match() {
904        // alt(seq('a','b'), seq('a','c')) on "ac" — first branch fails after
905        // matching 'a', second branch succeeds.
906        let p = alt(
907            seq(char_parser('a'), char_parser('b')),
908            seq(char_parser('a'), char_parser('c')),
909        );
910        let reply = p(state("ac"));
911        assert_eq!(remaining(&reply), "");
912    }
913}
rlsp_yaml_parser/combinator.rs

rlsp_yaml_parser/
combinator.rs