kalosm_sample/structured_parser/
separated.rs

1use std::{borrow::Cow, sync::Arc};
2
3use crate::{CreateParserState, ParseStatus, Parser};
4
5use super::ArcLinkedList;
6
7/// The state of the item in the separated parser.
8#[derive(Debug, PartialEq, Eq, Clone)]
9pub enum SeparatedItemState<Item, Separator> {
10    /// The item is in progress.
11    Item(Item),
12    /// The separator is in progress.
13    Separator(Separator),
14}
15
16/// State of a repeat parser.
17#[derive(Debug, PartialEq, Eq)]
18pub struct SeparatedParserState<P: Parser, S: Parser> {
19    pub(crate) new_state_in_progress: bool,
20    pub(crate) last_state: SeparatedItemState<P::PartialState, S::PartialState>,
21    pub(crate) outputs: ArcLinkedList<P::Output>,
22}
23
24impl<P: Parser, S: Parser> Clone for SeparatedParserState<P, S>
25where
26    P::PartialState: Clone,
27    S::PartialState: Clone,
28{
29    fn clone(&self) -> Self {
30        Self {
31            new_state_in_progress: self.new_state_in_progress,
32            last_state: self.last_state.clone(),
33            outputs: self.outputs.clone(),
34        }
35    }
36}
37
38impl<P: Parser, S: Parser> SeparatedParserState<P, S> {
39    /// Create a new repeat parser state.
40    pub fn new(
41        state: SeparatedItemState<P::PartialState, S::PartialState>,
42        outputs: Vec<P::Output>,
43    ) -> Self {
44        let mut outputs_ll = ArcLinkedList::default();
45        for output in outputs {
46            outputs_ll.push(Arc::new(output));
47        }
48        Self {
49            new_state_in_progress: false,
50            last_state: state,
51            outputs: outputs_ll,
52        }
53    }
54}
55
56impl<P: Parser, S: Parser> Default for SeparatedParserState<P, S>
57where
58    P::PartialState: Default,
59{
60    fn default() -> Self {
61        SeparatedParserState {
62            new_state_in_progress: false,
63            last_state: SeparatedItemState::Item(Default::default()),
64            outputs: Default::default(),
65        }
66    }
67}
68
69/// A parser for a repeat of two parsers.
70#[derive(Debug, PartialEq, Eq, Clone)]
71pub struct SeparatedParser<P, S> {
72    pub(crate) parser: P,
73    pub(crate) separator: S,
74    length_range: std::ops::RangeInclusive<usize>,
75}
76
77impl<P, S> Default for SeparatedParser<P, S>
78where
79    P: Default,
80    S: Default,
81{
82    fn default() -> Self {
83        SeparatedParser {
84            parser: Default::default(),
85            separator: Default::default(),
86            length_range: 0..=usize::MAX,
87        }
88    }
89}
90
91impl<P, S> SeparatedParser<P, S> {
92    /// Create a new repeat parser.
93    pub fn new(parser: P, separator: S, length_range: std::ops::RangeInclusive<usize>) -> Self {
94        Self {
95            parser,
96            separator,
97            length_range,
98        }
99    }
100}
101
102impl<P: CreateParserState, S: CreateParserState> CreateParserState for SeparatedParser<P, S> {
103    fn create_parser_state(&self) -> <Self as Parser>::PartialState {
104        SeparatedParserState {
105            new_state_in_progress: false,
106            last_state: SeparatedItemState::Item(self.parser.create_parser_state()),
107            outputs: Default::default(),
108        }
109    }
110}
111
112impl<P: CreateParserState, S: CreateParserState> Parser for SeparatedParser<P, S> {
113    type Output = Vec<P::Output>;
114    type PartialState = SeparatedParserState<P, S>;
115
116    fn parse<'a>(
117        &self,
118        state: &Self::PartialState,
119        input: &'a [u8],
120    ) -> crate::ParseResult<ParseStatus<'a, Self::PartialState, Self::Output>> {
121        let mut state = state.clone();
122        let mut remaining = input;
123        let required_next;
124        loop {
125            match &state.last_state {
126                SeparatedItemState::Item(item_state) => {
127                    let result = self.parser.parse(item_state, remaining);
128                    match result {
129                        Ok(ParseStatus::Finished {
130                            result,
131                            remaining: new_remaining,
132                        }) => {
133                            state.outputs.push(Arc::new(result));
134                            let separator_state = self.separator.create_parser_state();
135                            state.new_state_in_progress = false;
136                            remaining = new_remaining;
137                            if self.length_range.end() == &state.outputs.len() {
138                                return Ok(ParseStatus::Finished {
139                                    result: state.outputs.vec(),
140                                    remaining,
141                                });
142                            }
143                            if remaining.is_empty() {
144                                // If this is a valid place for the sequence to stop, there is no required next state
145                                // parsing an invalid sequence would be valid to stop the sequence
146                                let mut required_next = Cow::default();
147                                // Otherwise, the sequence must continue with another item
148                                // Grab the required next state from that item
149                                if !self.length_range.contains(&state.outputs.len()) {
150                                    if let Ok(ParseStatus::Incomplete {
151                                        required_next: new_required_next,
152                                        ..
153                                    }) = self.separator.parse(&separator_state, remaining)
154                                    {
155                                        required_next = new_required_next;
156                                    }
157                                }
158                                state.last_state = SeparatedItemState::Separator(separator_state);
159
160                                return Ok(ParseStatus::Incomplete {
161                                    new_state: state,
162                                    required_next,
163                                });
164                            }
165                            state.last_state = SeparatedItemState::Separator(separator_state);
166                        }
167                        Ok(ParseStatus::Incomplete {
168                            new_state,
169                            required_next: new_required_next,
170                        }) => {
171                            state.last_state = SeparatedItemState::Item(new_state);
172                            state.new_state_in_progress = true;
173                            required_next = Some(new_required_next);
174                            break;
175                        }
176                        Err(e) => {
177                            if !state.new_state_in_progress
178                                && self.length_range.contains(&state.outputs.len())
179                            {
180                                return Ok(ParseStatus::Finished {
181                                    result: state.outputs.vec(),
182                                    remaining,
183                                });
184                            } else {
185                                crate::bail!(e);
186                            }
187                        }
188                    }
189                }
190                SeparatedItemState::Separator(separator_state) => {
191                    let result = self.separator.parse(separator_state, remaining);
192                    match result {
193                        Ok(ParseStatus::Finished {
194                            remaining: new_remaining,
195                            ..
196                        }) => {
197                            let item_state = self.parser.create_parser_state();
198                            state.new_state_in_progress = false;
199                            remaining = new_remaining;
200                            if self.length_range.end() == &state.outputs.len() {
201                                return Ok(ParseStatus::Finished {
202                                    result: state.outputs.vec(),
203                                    remaining,
204                                });
205                            }
206                            if remaining.is_empty() {
207                                match self.parser.parse(&item_state, remaining) {
208                                    Ok(ParseStatus::Incomplete {
209                                        required_next: new_required_next,
210                                        ..
211                                    }) => required_next = Some(new_required_next),
212                                    _ => required_next = None,
213                                }
214                                break;
215                            }
216                            state.last_state = SeparatedItemState::Item(item_state);
217                        }
218                        Ok(ParseStatus::Incomplete {
219                            new_state,
220                            required_next: new_required_next,
221                        }) => {
222                            state.last_state = SeparatedItemState::Separator(new_state);
223                            state.new_state_in_progress = true;
224                            required_next = Some(new_required_next);
225                            break;
226                        }
227                        Err(e) => {
228                            if self.length_range.contains(&state.outputs.len()) {
229                                return Ok(ParseStatus::Finished {
230                                    result: state.outputs.vec(),
231                                    remaining,
232                                });
233                            } else {
234                                crate::bail!(e);
235                            }
236                        }
237                    }
238                }
239            }
240        }
241
242        Ok(ParseStatus::Incomplete {
243            new_state: state,
244            required_next: required_next.unwrap_or_default(),
245        })
246    }
247}
248
249#[test]
250fn repeat_parser() {
251    use crate::{
252        ArcLinkedListNode, CreateParserState, IntegerParser, LiteralParser, LiteralParserOffset,
253    };
254    let parser = SeparatedParser::new(LiteralParser::from("a"), LiteralParser::from("b"), 1..=3);
255    let state = parser.create_parser_state();
256    let result = parser.parse(&state, b"ababa");
257    assert_eq!(
258        result,
259        Ok(ParseStatus::Finished {
260            result: vec![(); 3],
261            remaining: b"",
262        })
263    );
264
265    let parser = SeparatedParser::new(IntegerParser::new(1..=3), LiteralParser::from("b"), 1..=3);
266    let state = parser.create_parser_state();
267    let result = parser.parse(&state, b"1b2b3");
268    assert_eq!(
269        result,
270        Ok(ParseStatus::Finished {
271            result: vec![1, 2, 3],
272            remaining: b"",
273        })
274    );
275
276    let parser = SeparatedParser::new(IntegerParser::new(1..=3), LiteralParser::from("bb"), 1..=3);
277    let state = parser.create_parser_state();
278    let result = parser.parse(&state, b"1bb2b");
279    assert_eq!(
280        result,
281        Ok(ParseStatus::Incomplete {
282            new_state: SeparatedParserState {
283                new_state_in_progress: true,
284                last_state: SeparatedItemState::Separator(LiteralParserOffset::new(1)),
285                outputs: ArcLinkedList {
286                    len: 2,
287                    tail: Some(ArcLinkedListNode {
288                        prev: Some(Arc::new(ArcLinkedListNode {
289                            prev: None,
290                            value: Arc::new(1)
291                        })),
292                        value: Arc::new(2)
293                    })
294                },
295            },
296            required_next: "b".into()
297        })
298    );
299
300    // There must be at least three numbers, which means the separator is required next
301    let parser = SeparatedParser::new(IntegerParser::new(1..=3), LiteralParser::from("b"), 3..=5);
302    let state = parser.create_parser_state();
303    let result = parser.parse(&state, b"1b2");
304    if let ParseStatus::Incomplete { required_next, .. } = result.unwrap() {
305        assert_eq!(required_next, "b");
306    } else {
307        panic!("expected incomplete");
308    }
309
310    // If we already parsed the required number of items, the separator is not required next
311    let parser = SeparatedParser::new(IntegerParser::new(1..=3), LiteralParser::from("b"), 3..=5);
312    let state = parser.create_parser_state();
313    let result = parser.parse(&state, b"1b2b3");
314    if let ParseStatus::Incomplete { required_next, .. } = result.unwrap() {
315        assert_eq!(required_next, "");
316    } else {
317        panic!("expected incomplete");
318    }
319}