kalosm_sample/structured_parser/
string.rs

1use crate::{CreateParserState, ParseStatus, Parser};
2
3type CharFilter = fn(char) -> bool;
4
5/// A parser for an ascii string.
6#[derive(Debug, PartialEq, Eq, Clone)]
7pub struct StringParser<F: Fn(char) -> bool + 'static = CharFilter> {
8    len_range: std::ops::RangeInclusive<usize>,
9    character_filter: F,
10}
11
12impl<F: Fn(char) -> bool + 'static> CreateParserState for StringParser<F> {
13    fn create_parser_state(&self) -> <Self as Parser>::PartialState {
14        StringParserState::default()
15    }
16}
17
18impl StringParser<fn(char) -> bool> {
19    /// Create a new string parser.
20    pub fn new(len_range: std::ops::RangeInclusive<usize>) -> Self {
21        Self {
22            len_range,
23            character_filter: |_| true,
24        }
25    }
26}
27
28impl<F: Fn(char) -> bool + 'static> StringParser<F> {
29    /// Only allow characters that pass the filter.
30    pub fn with_allowed_characters<F2: Fn(char) -> bool + 'static>(
31        self,
32        character_filter: F2,
33    ) -> StringParser<F2> {
34        StringParser {
35            len_range: self.len_range,
36            character_filter,
37        }
38    }
39
40    /// Only parse plain text that matches the character filter 'a'..'z' | 'A'..'Z' | '0'..'9' | ' ' | ',' | '.'
41    pub fn plain_text(self) -> StringParser {
42        self.with_allowed_characters(|c| {
43            matches!(
44                c,
45                'a'..='z' | 'A'..='Z' | ' ' | '0'..='9' | ',' | '.'
46            )
47        })
48    }
49
50    /// Only parse alphanumeric text and spaces (the character filter 'a'..'z' | 'A'..'Z' | '0'..'9' | ' ')
51    pub fn alphanumeric_with_spaces(self) -> StringParser {
52        self.with_allowed_characters(|c| {
53            matches!(
54                c,
55                'a'..='z' | 'A'..='Z' | '0'..='9' | ' '
56            )
57        })
58    }
59}
60
61#[derive(Default, Debug, PartialEq, Eq, Clone)]
62enum StringParserProgress {
63    #[default]
64    BeforeQuote,
65    InString,
66}
67
68/// The state of a literal parser.
69#[derive(Default, Debug, PartialEq, Eq, Clone)]
70pub struct StringParserState {
71    progress: StringParserProgress,
72    string: String,
73    next_char_escaped: bool,
74}
75
76impl StringParserState {
77    /// Create a new literal parser state.
78    pub fn new(string: String) -> Self {
79        let progress = if string.starts_with('"') {
80            StringParserProgress::InString
81        } else {
82            StringParserProgress::BeforeQuote
83        };
84        Self {
85            progress,
86            next_char_escaped: string.ends_with('\\'),
87            string,
88        }
89    }
90}
91
92/// An error that can occur while parsing a string literal.
93#[derive(Debug, PartialEq, Eq, Clone)]
94pub struct StringParseError;
95
96impl std::fmt::Display for StringParseError {
97    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
98        "StringParseError".fmt(f)
99    }
100}
101
102impl std::error::Error for StringParseError {}
103
104impl<F: Fn(char) -> bool + 'static> Parser for StringParser<F> {
105    type Output = String;
106    type PartialState = StringParserState;
107
108    fn parse<'a>(
109        &self,
110        state: &StringParserState,
111        input: &'a [u8],
112    ) -> crate::ParseResult<ParseStatus<'a, Self::PartialState, Self::Output>> {
113        let StringParserState {
114            mut progress,
115            mut string,
116            mut next_char_escaped,
117        } = state.clone();
118
119        for (i, byte) in input.iter().enumerate() {
120            match progress {
121                StringParserProgress::BeforeQuote => {
122                    if *byte == b'"' {
123                        progress = StringParserProgress::InString;
124                    } else {
125                        crate::bail!(StringParseError);
126                    }
127                }
128                StringParserProgress::InString => {
129                    let byte_unescaped_quote = !state.next_char_escaped && *byte == b'"';
130                    if !byte_unescaped_quote && !(self.character_filter)(*byte as char) {
131                        crate::bail!(StringParseError);
132                    }
133
134                    if string.len() == *self.len_range.end() && !byte_unescaped_quote {
135                        crate::bail!(StringParseError);
136                    }
137
138                    if next_char_escaped {
139                        next_char_escaped = false;
140                        string.push(*byte as char);
141                    } else if *byte == b'"' {
142                        if !self.len_range.contains(&string.len()) {
143                            crate::bail!(StringParseError);
144                        }
145                        return Ok(ParseStatus::Finished {
146                            remaining: &input[i + 1..],
147                            result: string,
148                        });
149                    } else if *byte == b'\\' {
150                        next_char_escaped = true;
151                    } else {
152                        string.push(*byte as char);
153                    }
154                }
155            }
156        }
157
158        Ok(ParseStatus::Incomplete {
159            new_state: StringParserState {
160                progress,
161                string,
162                next_char_escaped,
163            },
164            required_next: "".into(),
165        })
166    }
167}
168
169#[test]
170fn string_parser() {
171    let parser = StringParser::new(1..=20);
172    let state = StringParserState::default();
173    assert_eq!(
174        parser.parse(&state, b"\"Hello, \\\"world!\""),
175        Ok(ParseStatus::Finished {
176            result: "Hello, \"world!".to_string(),
177            remaining: &[]
178        })
179    );
180
181    assert_eq!(
182        parser.parse(&state, b"\"Hello, "),
183        Ok(ParseStatus::Incomplete {
184            new_state: StringParserState {
185                progress: StringParserProgress::InString,
186                string: "Hello, ".to_string(),
187                next_char_escaped: false,
188            },
189            required_next: "".into()
190        })
191    );
192
193    assert_eq!(
194        parser.parse(
195            &parser
196                .parse(&state, b"\"Hello, ")
197                .unwrap()
198                .unwrap_incomplete()
199                .0,
200            b"world!\""
201        ),
202        Ok(ParseStatus::Finished {
203            result: "Hello, world!".to_string(),
204            remaining: &[]
205        })
206    );
207}