kalosm_sample/structured_parser/
repeat.rs

1use std::{borrow::Cow, sync::Arc};
2
3use crate::{CreateParserState, ParseStatus, Parser};
4
5use super::ArcLinkedList;
6
7/// State of a repeat parser.
8#[derive(Debug, PartialEq, Eq)]
9pub struct RepeatParserState<P: Parser> {
10    pub(crate) new_state_in_progress: bool,
11    pub(crate) last_state: P::PartialState,
12    outputs: ArcLinkedList<P::Output>,
13}
14
15impl<P: Parser> Clone for RepeatParserState<P>
16where
17    P::PartialState: Clone,
18{
19    fn clone(&self) -> Self {
20        Self {
21            new_state_in_progress: self.new_state_in_progress,
22            last_state: self.last_state.clone(),
23            outputs: self.outputs.clone(),
24        }
25    }
26}
27
28impl<P: Parser> RepeatParserState<P> {
29    /// Create a new repeat parser state.
30    pub fn new(state: P::PartialState, outputs: Vec<P::Output>) -> Self {
31        let mut outputs_ll = ArcLinkedList::default();
32        for output in outputs {
33            outputs_ll.push(Arc::new(output));
34        }
35        Self {
36            new_state_in_progress: false,
37            last_state: state,
38            outputs: outputs_ll,
39        }
40    }
41}
42
43impl<P: Parser> Default for RepeatParserState<P>
44where
45    P::PartialState: Default,
46{
47    fn default() -> Self {
48        RepeatParserState {
49            new_state_in_progress: false,
50            last_state: Default::default(),
51            outputs: Default::default(),
52        }
53    }
54}
55
56/// A parser for a repeat of two parsers.
57#[derive(Debug, PartialEq, Eq, Clone)]
58pub struct RepeatParser<P> {
59    pub(crate) parser: P,
60    length_range: std::ops::RangeInclusive<usize>,
61}
62
63impl<P> Default for RepeatParser<P>
64where
65    P: Default,
66{
67    fn default() -> Self {
68        RepeatParser {
69            parser: Default::default(),
70            length_range: 0..=usize::MAX,
71        }
72    }
73}
74
75impl<P> RepeatParser<P> {
76    /// Create a new repeat parser.
77    pub fn new(parser: P, length_range: std::ops::RangeInclusive<usize>) -> Self {
78        Self {
79            parser,
80            length_range,
81        }
82    }
83}
84
85impl<O, PA, P: Parser<Output = O, PartialState = PA> + CreateParserState> CreateParserState
86    for RepeatParser<P>
87where
88    P::PartialState: Clone,
89    P::Output: Clone,
90{
91    fn create_parser_state(&self) -> <Self as Parser>::PartialState {
92        RepeatParserState {
93            new_state_in_progress: false,
94            last_state: self.parser.create_parser_state(),
95            outputs: Default::default(),
96        }
97    }
98}
99
100impl<O, PA, P: Parser<Output = O, PartialState = PA> + CreateParserState> Parser for RepeatParser<P>
101where
102    P::PartialState: Clone,
103    P::Output: Clone,
104{
105    type Output = Vec<O>;
106    type PartialState = RepeatParserState<P>;
107
108    fn parse<'a>(
109        &self,
110        state: &Self::PartialState,
111        input: &'a [u8],
112    ) -> crate::ParseResult<ParseStatus<'a, Self::PartialState, Self::Output>> {
113        let mut state = state.clone();
114        let mut remaining = input;
115        loop {
116            let result = self.parser.parse(&state.last_state, remaining);
117            match result {
118                Ok(ParseStatus::Finished {
119                    result,
120                    remaining: new_remaining,
121                }) => {
122                    state.outputs.push(Arc::new(result));
123                    state.last_state = self.parser.create_parser_state();
124                    state.new_state_in_progress = false;
125                    remaining = new_remaining;
126                    // If this is the maximum number of times we are repeating the parser,
127                    // return the finished state immediately
128                    if self.length_range.end() == &state.outputs.len() {
129                        return Ok(ParseStatus::Finished {
130                            result: state.outputs.vec(),
131                            remaining,
132                        });
133                    }
134                    // Otherwise, if we are out of input, return an empty required next state
135                    if remaining.is_empty() {
136                        // If this is a valid place for the sequence to stop, there is no required next state
137                        // parsing an invalid sequence would be valid to stop the sequence
138                        let mut required_next = Cow::default();
139                        // Otherwise, the sequence must continue with another item
140                        // Grab the required next state from that item
141                        if !self.length_range.contains(&state.outputs.len()) {
142                            if let Ok(ParseStatus::Incomplete {
143                                required_next: new_required_next,
144                                ..
145                            }) = self.parser.parse(&state.last_state, remaining)
146                            {
147                                required_next = new_required_next;
148                            }
149                        }
150
151                        return Ok(ParseStatus::Incomplete {
152                            new_state: state,
153                            required_next,
154                        });
155                    }
156                }
157                // If the parser is incomplete, we are out of input and we need to return
158                Ok(ParseStatus::Incomplete {
159                    new_state,
160                    required_next,
161                }) => {
162                    state.last_state = new_state;
163                    state.new_state_in_progress = true;
164                    return Ok(ParseStatus::Incomplete {
165                        new_state: state,
166                        required_next,
167                    });
168                }
169                // If we fail to parse, try to end the sequence
170                // We can only end the sequence if the current state is not in progress
171                // and this is in the valid range of times to repeat
172                Err(e) => {
173                    if !state.new_state_in_progress
174                        && self.length_range.contains(&state.outputs.len())
175                    {
176                        return Ok(ParseStatus::Finished {
177                            result: state.outputs.vec(),
178                            remaining,
179                        });
180                    } else {
181                        return Err(e);
182                    }
183                }
184            }
185        }
186    }
187}
188
189#[test]
190fn repeat_parser() {
191    use crate::{ArcLinkedListNode, IntegerParser, LiteralParser, ParserExt};
192    let parser = RepeatParser::new(LiteralParser::from("a"), 1..=3);
193    let state = parser.create_parser_state();
194    let result = parser.parse(&state, b"aaa");
195    assert_eq!(
196        result,
197        Ok(ParseStatus::Finished {
198            result: vec![(); 3],
199            remaining: b"",
200        })
201    );
202
203    let int_parser = IntegerParser::new(1..=3);
204    let parser = RepeatParser::new(int_parser.clone(), 1..=3);
205    let state = parser.create_parser_state();
206    let result = parser.parse(&state, b"123");
207    assert_eq!(
208        result,
209        Ok(ParseStatus::Finished {
210            result: vec![1, 2, 3],
211            remaining: b"",
212        })
213    );
214
215    let parser = RepeatParser::new(int_parser.clone(), 1..=3);
216    let state = parser.create_parser_state();
217    let result = parser.parse(&state, b"12");
218    assert_eq!(
219        result,
220        Ok(ParseStatus::Incomplete {
221            new_state: RepeatParserState {
222                new_state_in_progress: false,
223                last_state: int_parser.create_parser_state(),
224                outputs: ArcLinkedList {
225                    len: 2,
226                    tail: Some(ArcLinkedListNode {
227                        prev: Some(Arc::new(ArcLinkedListNode {
228                            prev: None,
229                            value: Arc::new(1)
230                        })),
231                        value: Arc::new(2)
232                    })
233                },
234            },
235            required_next: Default::default()
236        })
237    );
238
239    // It is not valid to stop the sequence here, required next must be some
240    let separated_int_parser = LiteralParser::new("  ").ignore_output_then(int_parser);
241    let repeat_separated_int_parser = RepeatParser::new(separated_int_parser.clone(), 3..=5);
242    let state = repeat_separated_int_parser.create_parser_state();
243    let result = repeat_separated_int_parser.parse(&state, b"  1  2");
244    assert_eq!(
245        result,
246        Ok(ParseStatus::Incomplete {
247            new_state: RepeatParserState {
248                new_state_in_progress: false,
249                last_state: separated_int_parser.create_parser_state(),
250                outputs: ArcLinkedList {
251                    len: 2,
252                    tail: Some(ArcLinkedListNode {
253                        prev: Some(Arc::new(ArcLinkedListNode {
254                            prev: None,
255                            value: Arc::new(1)
256                        })),
257                        value: Arc::new(2)
258                    })
259                },
260            },
261            required_next: "  ".into()
262        })
263    );
264
265    // It is valid to stop here. Required next must be none
266    let state = repeat_separated_int_parser.create_parser_state();
267    let result = repeat_separated_int_parser.parse(&state, b"  1  2  3");
268    assert_eq!(
269        result,
270        Ok(ParseStatus::Incomplete {
271            new_state: RepeatParserState {
272                new_state_in_progress: false,
273                last_state: separated_int_parser.create_parser_state(),
274                outputs: ArcLinkedList {
275                    len: 3,
276                    tail: Some(ArcLinkedListNode {
277                        prev: Some(Arc::new(ArcLinkedListNode {
278                            prev: Some(Arc::new(ArcLinkedListNode {
279                                prev: None,
280                                value: Arc::new(1)
281                            })),
282                            value: Arc::new(2)
283                        })),
284                        value: Arc::new(3)
285                    })
286                },
287            },
288            required_next: Default::default()
289        })
290    );
291}