kalosm_sample/structured_parser/
string.rs1use crate::{CreateParserState, ParseStatus, Parser};
2
3type CharFilter = fn(char) -> bool;
4
5#[derive(Debug, PartialEq, Eq, Clone)]
7pub struct StringParser<F: Fn(char) -> bool + 'static = CharFilter> {
8 len_range: std::ops::RangeInclusive<usize>,
9 character_filter: F,
10}
11
12impl<F: Fn(char) -> bool + 'static> CreateParserState for StringParser<F> {
13 fn create_parser_state(&self) -> <Self as Parser>::PartialState {
14 StringParserState::default()
15 }
16}
17
18impl StringParser<fn(char) -> bool> {
19 pub fn new(len_range: std::ops::RangeInclusive<usize>) -> Self {
21 Self {
22 len_range,
23 character_filter: |_| true,
24 }
25 }
26}
27
28impl<F: Fn(char) -> bool + 'static> StringParser<F> {
29 pub fn with_allowed_characters<F2: Fn(char) -> bool + 'static>(
31 self,
32 character_filter: F2,
33 ) -> StringParser<F2> {
34 StringParser {
35 len_range: self.len_range,
36 character_filter,
37 }
38 }
39
40 pub fn plain_text(self) -> StringParser {
42 self.with_allowed_characters(|c| {
43 matches!(
44 c,
45 'a'..='z' | 'A'..='Z' | ' ' | '0'..='9' | ',' | '.'
46 )
47 })
48 }
49
50 pub fn alphanumeric_with_spaces(self) -> StringParser {
52 self.with_allowed_characters(|c| {
53 matches!(
54 c,
55 'a'..='z' | 'A'..='Z' | '0'..='9' | ' '
56 )
57 })
58 }
59}
60
61#[derive(Default, Debug, PartialEq, Eq, Clone)]
62enum StringParserProgress {
63 #[default]
64 BeforeQuote,
65 InString,
66}
67
68#[derive(Default, Debug, PartialEq, Eq, Clone)]
70pub struct StringParserState {
71 progress: StringParserProgress,
72 string: String,
73 next_char_escaped: bool,
74}
75
76impl StringParserState {
77 pub fn new(string: String) -> Self {
79 let progress = if string.starts_with('"') {
80 StringParserProgress::InString
81 } else {
82 StringParserProgress::BeforeQuote
83 };
84 Self {
85 progress,
86 next_char_escaped: string.ends_with('\\'),
87 string,
88 }
89 }
90}
91
92#[derive(Debug, PartialEq, Eq, Clone)]
94pub struct StringParseError;
95
96impl std::fmt::Display for StringParseError {
97 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
98 "StringParseError".fmt(f)
99 }
100}
101
102impl std::error::Error for StringParseError {}
103
104impl<F: Fn(char) -> bool + 'static> Parser for StringParser<F> {
105 type Output = String;
106 type PartialState = StringParserState;
107
108 fn parse<'a>(
109 &self,
110 state: &StringParserState,
111 input: &'a [u8],
112 ) -> crate::ParseResult<ParseStatus<'a, Self::PartialState, Self::Output>> {
113 let StringParserState {
114 mut progress,
115 mut string,
116 mut next_char_escaped,
117 } = state.clone();
118
119 for (i, byte) in input.iter().enumerate() {
120 match progress {
121 StringParserProgress::BeforeQuote => {
122 if *byte == b'"' {
123 progress = StringParserProgress::InString;
124 } else {
125 crate::bail!(StringParseError);
126 }
127 }
128 StringParserProgress::InString => {
129 let byte_unescaped_quote = !state.next_char_escaped && *byte == b'"';
130 if !byte_unescaped_quote && !(self.character_filter)(*byte as char) {
131 crate::bail!(StringParseError);
132 }
133
134 if string.len() == *self.len_range.end() && !byte_unescaped_quote {
135 crate::bail!(StringParseError);
136 }
137
138 if next_char_escaped {
139 next_char_escaped = false;
140 string.push(*byte as char);
141 } else if *byte == b'"' {
142 if !self.len_range.contains(&string.len()) {
143 crate::bail!(StringParseError);
144 }
145 return Ok(ParseStatus::Finished {
146 remaining: &input[i + 1..],
147 result: string,
148 });
149 } else if *byte == b'\\' {
150 next_char_escaped = true;
151 } else {
152 string.push(*byte as char);
153 }
154 }
155 }
156 }
157
158 Ok(ParseStatus::Incomplete {
159 new_state: StringParserState {
160 progress,
161 string,
162 next_char_escaped,
163 },
164 required_next: "".into(),
165 })
166 }
167}
168
169#[test]
170fn string_parser() {
171 let parser = StringParser::new(1..=20);
172 let state = StringParserState::default();
173 assert_eq!(
174 parser.parse(&state, b"\"Hello, \\\"world!\""),
175 Ok(ParseStatus::Finished {
176 result: "Hello, \"world!".to_string(),
177 remaining: &[]
178 })
179 );
180
181 assert_eq!(
182 parser.parse(&state, b"\"Hello, "),
183 Ok(ParseStatus::Incomplete {
184 new_state: StringParserState {
185 progress: StringParserProgress::InString,
186 string: "Hello, ".to_string(),
187 next_char_escaped: false,
188 },
189 required_next: "".into()
190 })
191 );
192
193 assert_eq!(
194 parser.parse(
195 &parser
196 .parse(&state, b"\"Hello, ")
197 .unwrap()
198 .unwrap_incomplete()
199 .0,
200 b"world!\""
201 ),
202 Ok(ParseStatus::Finished {
203 result: "Hello, world!".to_string(),
204 remaining: &[]
205 })
206 );
207}