1use bytes::Buf as _;
2
3use crate::iterable::OutputToByteStream;
4use crate::repeatable::Repeatable;
5use crate::{ByteStream, Extract, ParseAny, ParseResult, ParseWhen};
6
7const UTF8_CHAR_WIDTH: &[u8; 256] = &[
8 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ];
26
27#[must_use]
30#[inline]
31const fn utf8_char_width(b: u8) -> usize {
32 UTF8_CHAR_WIDTH[b as usize] as usize
33}
34
35impl Extract for char {
37 type State = (u8, ByteStream);
38 type Output = ByteStream;
39
40 fn extract(
41 &self,
42 mut input: ByteStream,
43 state: Option<Self::State>,
44 last: bool,
45 ) -> ParseResult<Self::State, ByteStream> {
46 let mut char_buf = [0u8; 4];
47 let mut bytes = self.encode_utf8(&mut char_buf).as_bytes();
48 let (mut seen, mut output) = state.unwrap_or((0, ByteStream::new(input.position())));
49 bytes.advance(seen as usize);
50 let matched = input.common_prefix_length(bytes);
51 if matched == bytes.len() {
52 output.merge(input.take_before(matched));
53 ParseResult::Match(output, input)
54 } else if !last && matched == input.remaining() {
55 output.merge(input.take_before(matched));
56 seen += matched as u8;
57 ParseResult::Partial((seen, output))
58 } else {
59 ParseResult::NoMatch(output.position())
60 }
61 }
62}
63
64impl Repeatable for char {}
66
67impl OutputToByteStream for char {
68 fn output_to_bytestream(output: Self::Output) -> ByteStream {
69 output
70 }
71}
72
73impl Extract for &str {
75 type State = (usize, ByteStream);
76 type Output = ByteStream;
77
78 fn extract(
79 &self,
80 mut input: ByteStream,
81 state: Option<Self::State>,
82 last: bool,
83 ) -> ParseResult<Self::State, ByteStream> {
84 let mut bytes = self.as_bytes();
85 let (mut seen, mut output) = state.unwrap_or((0, ByteStream::new(input.position())));
86 bytes.advance(seen);
87 let matched = input.common_prefix_length(bytes);
88 if matched == bytes.len() {
89 output.merge(input.take_before(matched));
90 ParseResult::Match(output, input)
91 } else if !last && matched == input.remaining() {
92 output.merge(input.take_before(matched));
93 seen += matched;
94 ParseResult::Partial((seen, output))
95 } else {
96 ParseResult::NoMatch(output.position())
97 }
98 }
99}
100
101impl Repeatable for &str {}
102
103impl OutputToByteStream for &str {
104 fn output_to_bytestream(output: Self::Output) -> ByteStream {
105 output
106 }
107}
108
109pub struct AnyCharParser;
110
111impl Extract for AnyCharParser {
113 type State = (usize, ByteStream);
114 type Output = ByteStream;
115
116 fn extract(
117 &self,
118 mut input: ByteStream,
119 state: Option<Self::State>,
120 last: bool,
121 ) -> ParseResult<Self::State, ByteStream> {
122 let (mut required, mut output) = state.unwrap_or((0, ByteStream::new(input.position())));
123 if required == 0 {
124 match input.iter().next() {
125 Some(&b) => {
126 required = utf8_char_width(b);
127 if required == 0 {
128 return ParseResult::NoMatch(output.position());
129 }
130 }
131 None if last => {
132 return ParseResult::NoMatch(output.position());
133 }
134 None => {
135 return ParseResult::Partial((required, output));
136 }
137 }
138 }
139 let input_len = input.remaining();
140 if input_len < required {
141 if last {
142 ParseResult::NoMatch(output.position())
143 } else {
144 required -= input_len;
145 output.merge(input.take_before(input_len));
146 ParseResult::Partial((required, output))
147 }
148 } else {
149 output.merge(input.take_before(required));
150 ParseResult::Match(output, input)
151 }
152 }
153}
154
155impl Repeatable for AnyCharParser {}
157
158impl OutputToByteStream for AnyCharParser {
159 fn output_to_bytestream(output: Self::Output) -> ByteStream {
160 output
161 }
162}
163
164impl ParseAny for char {
165 type Parser = AnyCharParser;
166
167 fn any() -> Self::Parser {
168 AnyCharParser
169 }
170}
171
172#[derive(Clone)]
173pub struct CharWhenParser<F>(F);
174
175impl<F> Extract for CharWhenParser<F>
177where
178 F: Fn(char) -> bool,
179{
180 type State = (usize, ByteStream);
181 type Output = ByteStream;
182
183 fn extract(
184 &self,
185 mut input: ByteStream,
186 state: Option<Self::State>,
187 last: bool,
188 ) -> ParseResult<Self::State, ByteStream> {
189 let (mut required, mut output) = state.unwrap_or((0, ByteStream::new(input.position())));
190 if required == 0 {
191 match input.iter().next() {
192 Some(&b) => {
193 required = utf8_char_width(b);
194 if required == 0 {
195 return ParseResult::NoMatch(output.position());
196 }
197 }
198 None if last => {
199 return ParseResult::NoMatch(output.position());
200 }
201 None => {
202 return ParseResult::Partial((required, output));
203 }
204 }
205 }
206 let input_len = input.remaining();
207 if input_len < required {
208 if last {
209 ParseResult::NoMatch(output.position())
210 } else {
211 output.merge(input.take_before(input_len));
212 ParseResult::Partial((required - input_len, output))
213 }
214 } else {
215 output.merge(input.take_before(required));
216 let mut bytes = [0; 4];
217 let len = output.fill_slice(&mut bytes);
218 match std::str::from_utf8(&bytes[..len]) {
219 Ok(s) => {
220 let Some(c) = s.chars().next() else {
221 return ParseResult::NoMatch(output.position());
222 };
223 if (self.0)(c) {
224 ParseResult::Match(output, input)
225 } else {
226 ParseResult::NoMatch(output.position())
227 }
228 }
229 Err(_) => ParseResult::NoMatch(output.position()),
230 }
231 }
232 }
233}
234
235impl<F> Repeatable for CharWhenParser<F> where F: Fn(char) -> bool {}
237
238impl<F> OutputToByteStream for CharWhenParser<F>
239where
240 F: Fn(char) -> bool,
241{
242 fn output_to_bytestream(output: Self::Output) -> ByteStream {
243 output
244 }
245}
246
247impl<F> ParseWhen<char, F> for char
248where
249 F: Fn(char) -> bool,
250{
251 type Parser = CharWhenParser<F>;
252
253 fn when(f: F) -> Self::Parser {
254 CharWhenParser(f)
255 }
256}