1use super::parser;
4use super::{Error, Result};
5use crate::range::RangeArgument;
6use crate::set::Set;
7use bstr::decode_utf8;
8use std::fmt::Debug;
9use std::ops::{Add, BitOr, Mul, Neg, Not, Shr, Sub};
10use std::str;
11
12pub struct Parser<'a, O>(parser::Parser<'a, u8, O>);
17
18impl<'a, O> Parser<'a, O> {
19 pub fn new<P>(parse: P) -> Self
21 where
22 P: Fn(&'a [u8], usize) -> Result<(O, usize)> + 'a,
23 {
24 Self(parser::Parser::new(parse))
25 }
26
27 pub fn collect(self) -> Parser<'a, &'a str>
30 where
31 O: 'a,
32 {
33 Parser(self.0.collect().map(
34 |s| unsafe { str::from_utf8_unchecked(s) },
36 ))
37 }
38
39 pub fn parse(&self, input: &'a [u8]) -> Result<O> {
43 self.0.parse(input)
44 }
45
46 pub fn parse_at(&self, input: &'a [u8], start: usize) -> Result<(O, usize)> {
48 self.0.parse_at(input, start)
49 }
50
51 pub fn parse_str(&self, input: &'a str) -> Result<O> {
53 self.0.parse(input.as_bytes())
54 }
55
56 pub fn map<U, F>(self, f: F) -> Parser<'a, U>
58 where
59 F: Fn(O) -> U + 'a,
60 O: 'a,
61 U: 'a,
62 {
63 Parser(self.0.map(f))
64 }
65
66 pub fn convert<U, E, F>(self, f: F) -> Parser<'a, U>
68 where
69 F: Fn(O) -> ::std::result::Result<U, E> + 'a,
70 E: Debug,
71 O: 'a,
72 U: 'a,
73 {
74 Parser(self.0.convert(f))
75 }
76
77 pub fn cache(self) -> Self
79 where
80 O: Clone + 'a,
81 {
82 Self(self.0.cache())
83 }
84
85 pub fn pos(self) -> Parser<'a, usize>
87 where
88 O: 'a,
89 {
90 Parser(self.0.pos())
91 }
92
93 pub fn discard(self) -> Parser<'a, ()>
95 where
96 O: 'a,
97 {
98 Parser(self.0.discard())
99 }
100
101 pub fn opt(self) -> Parser<'a, Option<O>>
103 where
104 O: 'a,
105 {
106 Parser(self.0.opt())
107 }
108
109 pub fn repeat<R>(self, range: R) -> Parser<'a, Vec<O>>
114 where
115 R: RangeArgument<usize> + Debug + 'a,
116 O: 'a,
117 {
118 Parser(self.0.repeat(range))
119 }
120
121 pub fn name(self, name: &'a str) -> Self
123 where
124 O: 'a,
125 {
126 Self(self.0.name(name))
127 }
128
129 pub fn expect(self, name: &'a str) -> Self
131 where
132 O: 'a,
133 {
134 Self(self.0.expect(name))
135 }
136}
137
138impl<'a, O> From<Parser<'a, O>> for parser::Parser<'a, u8, O> {
139 fn from(parser: Parser<'a, O>) -> Self {
140 parser.0 }
142}
143
144pub fn decode(slice: &[u8], start: usize) -> Result<(char, usize)> {
145 let (ch, size) = decode_utf8(&slice[start..]);
146 let Some(ch) = ch else {
147 return no_utf8(start, size);
148 };
149 Ok((ch, size))
150}
151
152fn no_utf8<T>(start: usize, size: usize) -> Result<T> {
154 Err(Error::Mismatch {
155 message: if size == 0 {
156 "end of input reached"
157 } else {
158 "not UTF-8"
159 }
160 .to_owned(),
161 position: start,
162 })
163}
164
165pub fn any<'a>() -> Parser<'a, char> {
167 Parser::new(|input: &[u8], start: usize| {
168 let (ch, size) = decode(input, start)?;
169 let pos = start + size;
170 Ok((ch, pos))
171 })
172}
173
174pub fn sym<'a>(tag: char) -> Parser<'a, char> {
176 Parser::new(move |input: &[u8], start: usize| {
177 let (ch, size) = decode(input, start)?;
178 if ch != tag {
179 return Err(Error::Mismatch {
180 message: format!("expect: {}, found: {}", tag, ch),
181 position: start,
182 });
183 }
184 let pos = start + size;
185 Ok((ch, pos))
186 })
187}
188
189pub fn seq<'a, 'b: 'a>(tag_str: &'b str) -> Parser<'a, &'a str> {
191 let tag = tag_str.as_bytes();
192 Parser::new(move |input: &'a [u8], start: usize| {
193 let mut index = 0;
194 loop {
195 let pos = start + index;
196 if index == tag.len() {
197 let result = &input[start..pos];
198 let result_str = unsafe { str::from_utf8_unchecked(result) };
200 return Ok((result_str, pos));
201 }
202 let Some(s) = input.get(pos) else {
203 return Err(Error::Incomplete);
204 };
205 if tag[index] != *s {
206 return Err(Error::Mismatch {
207 message: format!("seq {:?} at byte index: {}", tag, pos),
208 position: pos,
209 });
210 }
211 index += 1;
212 }
213 })
214}
215
216pub fn one_of<'a, S>(set: &'a S) -> Parser<'a, char>
218where
219 S: Set<char> + ?Sized,
220{
221 Parser::new(move |input: &'a [u8], start: usize| {
222 let (ch, size) = decode(input, start)?;
223 if !set.contains(&ch) {
224 return Err(Error::Mismatch {
225 message: format!("expect one of: {}, found: {}", set.to_str(), ch),
226 position: start,
227 });
228 }
229 let pos = start + size;
230 Ok((ch, pos))
231 })
232}
233
234pub fn none_of<'a, S>(set: &'a S) -> Parser<'a, char>
236where
237 S: Set<char> + ?Sized,
238{
239 Parser::new(move |input: &'a [u8], start: usize| {
240 let (ch, size) = decode(input, start)?;
241 if set.contains(&ch) {
242 return Err(Error::Mismatch {
243 message: format!("expect one of: {}, found: {}", set.to_str(), ch),
244 position: start,
245 });
246 }
247 let pos = start + size;
248 Ok((ch, pos))
249 })
250}
251
252pub fn is_a<'a, F>(predicate: F) -> Parser<'a, char>
254where
255 F: Fn(char) -> bool + 'a,
256{
257 Parser::new(move |input: &'a [u8], start: usize| {
258 let (ch, size) = decode(input, start)?;
259 if !predicate(ch) {
260 return Err(Error::Mismatch {
261 message: format!("is_a predicate failed on: {}", ch),
262 position: start,
263 });
264 }
265 let pos = start + size;
266 Ok((ch, pos))
267 })
268}
269
270pub fn not_a<'a, F>(predicate: F) -> Parser<'a, char>
272where
273 F: Fn(char) -> bool + 'a,
274{
275 Parser::new(move |input: &'a [u8], start: usize| {
276 let (ch, size) = decode(input, start)?;
277 if predicate(ch) {
278 return Err(Error::Mismatch {
279 message: format!("is_a predicate failed on: {}", ch),
280 position: start,
281 });
282 }
283 let pos = start + size;
284 Ok((ch, pos))
285 })
286}
287
288pub fn take<'a>(n: usize) -> Parser<'a, &'a str> {
290 Parser::new(move |input: &'a [u8], start: usize| {
291 let mut byte_pos = start;
292 for _ in 0..n {
293 let (ch, size) = decode_utf8(&input[start..]);
294 if ch.is_none() {
295 return no_utf8(byte_pos, size);
296 }
297 byte_pos += size;
298 }
299 let result = &input[start..byte_pos];
300 let result_str = unsafe { str::from_utf8_unchecked(result) };
302 Ok((result_str, byte_pos))
303 })
304}
305
306pub fn skip<'a>(n: usize) -> Parser<'a, ()> {
308 Parser::new(move |input: &'a [u8], start: usize| {
309 let mut byte_pos = start;
310 for _ in 0..n {
311 let (ch, size) = decode_utf8(&input[start..]);
312 if ch.is_none() {
313 return no_utf8(byte_pos, size);
314 }
315 byte_pos += size;
316 }
317 Ok(((), byte_pos))
318 })
319}
320
321pub fn take_bytes<'a>(n: usize) -> Parser<'a, &'a str> {
323 Parser::new(move |input: &'a [u8], start: usize| {
324 let mut byte_pos = start;
329 loop {
330 let (ch, size) = decode_utf8(&input[start..]);
331 if ch.is_none() {
332 return no_utf8(byte_pos, size);
333 }
334 byte_pos += size;
335 if byte_pos > n {
336 return Err(Error::Mismatch {
337 message: "range splits a UTF-8 character".to_owned(),
338 position: start,
339 });
340 }
341 if byte_pos == n {
342 let result = &input[start..byte_pos];
343 let result_str = unsafe { str::from_utf8_unchecked(result) };
345 return Ok((result_str, byte_pos));
346 }
347 }
348 })
349}
350
351pub fn skip_bytes<'a>(n: usize) -> Parser<'a, ()> {
353 Parser::new(move |input: &'a [u8], start: usize| {
354 let mut byte_pos = start;
356 loop {
357 let (ch, size) = decode_utf8(&input[start..]);
358 if ch.is_none() {
359 return no_utf8(byte_pos, size);
360 }
361 byte_pos += size;
362 if byte_pos > n {
363 return Err(Error::Mismatch {
364 message: "range splits a UTF-8 character".to_owned(),
365 position: start,
366 });
367 }
368 if byte_pos == n {
369 return Ok(((), byte_pos));
370 }
371 }
372 })
373}
374
375impl<'a, O: 'a, U: 'a, F: Fn(O) -> Parser<'a, U> + 'a> Shr<F> for Parser<'a, O> {
377 type Output = Parser<'a, U>;
378
379 fn shr(self, other: F) -> Self::Output {
380 Parser::new(move |input: &'a [u8], start: usize| {
381 (self.0.method)(input, start).and_then(|(out, pos)| (other(out).0.method)(input, pos))
382 })
383 }
384}
385
386pub fn empty<'a>() -> Parser<'a, ()> {
393 Parser(parser::empty())
394}
395
396pub fn list<'a, O, U>(item: Parser<'a, O>, separator: Parser<'a, U>) -> Parser<'a, Vec<O>>
398where
399 O: 'a,
400 U: 'a,
401{
402 Parser(parser::list(item.0, separator.0))
403}
404
405pub fn call<'a, O, F>(parser_factory: F) -> Parser<'a, O>
407where
408 O: 'a,
409 F: Fn() -> Parser<'a, O> + 'a,
410{
411 Parser(parser::call(move || parser_factory().0))
412}
413
414pub fn end<'a>() -> Parser<'a, ()> {
416 Parser(parser::end())
417}
418
419macro_rules! utf_op {
422 ( $impl_name:ident, $fn_name:ident, $op:tt, $return_type:ty, $doc:expr ) => {
423 #[doc=$doc]
424 impl<'a, Left: 'a, Right: 'a> $impl_name<Parser<'a, Right>> for Parser<'a, Left> {
425 type Output = Parser<'a, $return_type>;
426
427 fn $fn_name (self, other: Parser<'a, Right>) -> Self::Output {
428 Parser(self.0 $op other.0)
429 }
430 }
431 };
432}
433
434macro_rules! utf_u8_op {
435 ( $impl_name:ident, $fn_name:ident, $op:tt, $return_type:ty, $doc:expr ) => {
436 #[doc=concat!($doc, " (but degrade to non-utf8 parser)")]
437 impl<'a, Left: 'a, Right: 'a> $impl_name<parser::Parser<'a, u8, Right>> for Parser<'a, Left> {
438 type Output = parser::Parser<'a, u8, $return_type>;
439
440 fn $fn_name (self, other: parser::Parser<'a, u8, Right>) -> Self::Output {
441 self.0 $op other
442 }
443 }
444 };
445}
446
447macro_rules! u8_utf_op {
448 ( $impl_name:ident, $fn_name:ident, $op:tt, $return_type:ty, $doc:expr ) => {
449 #[doc=concat!($doc, " (but degrade to non-utf8 parser)")]
450 impl<'a, Left: 'a, Right: 'a> $impl_name<Parser<'a, Right>> for parser::Parser<'a, u8, Left> {
451 type Output = parser::Parser<'a, u8, $return_type>;
452
453 fn $fn_name (self, other: Parser<'a, Right>) -> Self::Output {
454 self $op other.0
455 }
456 }
457 };
458}
459
460macro_rules! all_op {
461 ( $impl_name:ident, $fn_name:ident, $op:tt, $return_type:ty, $doc:expr ) => {
462 utf_op!($impl_name, $fn_name, $op, $return_type, $doc);
463 utf_u8_op!($impl_name, $fn_name, $op, $return_type, $doc);
464 u8_utf_op!($impl_name, $fn_name, $op, $return_type, $doc);
465 };
466}
467
468all_op!(Add, add, +, (Left, Right), "Sequence reserve value");
469
470all_op!(Sub, sub, -, Left, "Sequence discard second value");
471
472all_op!(Mul, mul, *, Right, "Sequence discard first value");
473
474impl<'a, O: 'a> BitOr for Parser<'a, O> {
476 type Output = Self;
477
478 fn bitor(self, other: Self) -> Self {
479 Self(self.0 | other.0)
480 }
481}
482
483impl<'a, O: 'a> BitOr<parser::Parser<'a, u8, O>> for Parser<'a, O> {
485 type Output = parser::Parser<'a, u8, O>;
486
487 fn bitor(self, other: parser::Parser<'a, u8, O>) -> Self::Output {
488 self.0 | other
489 }
490}
491
492impl<'a, O: 'a> BitOr<Parser<'a, O>> for parser::Parser<'a, u8, O> {
494 type Output = parser::Parser<'a, u8, O>;
495
496 fn bitor(self, other: Parser<'a, O>) -> Self::Output {
497 self | other.0
498 }
499}
500
501impl<'a, O: 'a> Neg for Parser<'a, O> {
503 type Output = Parser<'a, bool>;
504
505 fn neg(self) -> Self::Output {
506 Parser(-self.0)
507 }
508}
509
510impl<'a, O: 'a> Not for Parser<'a, O> {
512 type Output = Parser<'a, bool>;
513
514 fn not(self) -> Self::Output {
515 Parser(!self.0)
516 }
517}