pom/
parser.rs

1use super::{Error, Result};
2use crate::range::Bound::*;
3use crate::range::RangeArgument;
4use crate::set::Set;
5use std::fmt::{Debug, Display};
6use std::ops::{Add, BitOr, Mul, Neg, Not, Shr, Sub};
7
8type Parse<'a, I, O> = dyn Fn(&'a [I], usize) -> Result<(O, usize)> + 'a;
9
10/// Parser combinator.
11pub struct Parser<'a, I, O> {
12	pub method: Box<Parse<'a, I, O>>,
13}
14
15impl<'a, I, O> Parser<'a, I, O> {
16	/// Create new parser.
17	pub fn new<P>(parse: P) -> Parser<'a, I, O>
18	where
19		P: Fn(&'a [I], usize) -> Result<(O, usize)> + 'a,
20	{
21		Parser {
22			method: Box::new(parse),
23		}
24	}
25
26	/// Apply the parser to parse input.
27	pub fn parse(&self, input: &'a [I]) -> Result<O> {
28		(self.method)(input, 0).map(|(out, _)| out)
29	}
30
31	/// Parse input at specified position.
32	pub fn parse_at(&self, input: &'a [I], start: usize) -> Result<(O, usize)> {
33		(self.method)(input, start)
34	}
35
36	/// Convert parser result to desired value.
37	pub fn map<U, F>(self, f: F) -> Parser<'a, I, U>
38	where
39		F: Fn(O) -> U + 'a,
40		I: 'a,
41		O: 'a,
42		U: 'a,
43	{
44		Parser::new(move |input: &'a [I], start: usize| {
45			(self.method)(input, start).map(|(out, pos)| (f(out), pos))
46		})
47	}
48
49	/// Convert parser result to desired value, fail in case of conversion error.
50	pub fn convert<U, E, F>(self, f: F) -> Parser<'a, I, U>
51	where
52		F: Fn(O) -> ::std::result::Result<U, E> + 'a,
53		E: Debug,
54		O: 'a,
55		U: 'a,
56	{
57		Parser::new(move |input: &'a [I], start: usize| {
58			(self.method)(input, start).and_then(|(res, pos)| match f(res) {
59				Ok(out) => Ok((out, pos)),
60				Err(err) => Err(Error::Conversion {
61					message: format!("Conversion error: {:?}", err),
62					position: start,
63				}),
64			})
65		})
66	}
67
68	/// Cache parser output result to speed up backtracking.
69	pub fn cache(self) -> Parser<'a, I, O>
70	where
71		O: Clone + 'a,
72	{
73		use std::cell::RefCell;
74		use std::collections::HashMap;
75		let results = RefCell::new(HashMap::new());
76		Parser::new(move |input: &'a [I], start: usize| {
77			let key = (start, format!("{:p}", &self.method));
78			results
79				.borrow_mut()
80				.entry(key)
81				.or_insert_with(|| (self.method)(input, start))
82				.clone()
83		})
84	}
85
86	/// Get input position after matching parser.
87	pub fn pos(self) -> Parser<'a, I, usize>
88	where
89		O: 'a,
90	{
91		Parser::new(move |input: &'a [I], start: usize| {
92			(self.method)(input, start).map(|(_, pos)| (pos, pos))
93		})
94	}
95
96	/// Collect all matched input symbols.
97	pub fn collect(self) -> Parser<'a, I, &'a [I]>
98	where
99		O: 'a,
100	{
101		Parser::new(move |input: &'a [I], start: usize| {
102			(self.method)(input, start).map(|(_, end)| (&input[start..end], end))
103		})
104	}
105
106	/// Discard parser output.
107	pub fn discard(self) -> Parser<'a, I, ()>
108	where
109		O: 'a,
110	{
111		Parser::new(move |input: &'a [I], start: usize| {
112			(self.method)(input, start).map(|(_, end)| ((), end))
113		})
114	}
115
116	/// Make parser optional.
117	pub fn opt(self) -> Parser<'a, I, Option<O>>
118	where
119		O: 'a,
120	{
121		Parser::new(
122			move |input: &'a [I], start: usize| match (self.method)(input, start) {
123				Ok((out, pos)) => Ok((Some(out), pos)),
124				Err(_) => Ok((None, start)),
125			},
126		)
127	}
128
129	/// `p.repeat(5)` repeat p exactly 5 times
130	/// `p.repeat(0..)` repeat p zero or more times
131	/// `p.repeat(1..)` repeat p one or more times
132	/// `p.repeat(1..4)` match p at least 1 and at most 3 times
133	pub fn repeat<R>(self, range: R) -> Parser<'a, I, Vec<O>>
134	where
135		R: RangeArgument<usize> + Debug + 'a,
136		O: 'a,
137	{
138		Parser::new(move |input: &'a [I], start: usize| {
139			let mut items = vec![];
140			let mut pos = start;
141			loop {
142				match range.end() {
143					Included(&max_count) => {
144						if items.len() >= max_count {
145							break;
146						}
147					}
148					Excluded(&max_count) => {
149						if items.len() + 1 >= max_count {
150							break;
151						}
152					}
153					Unbounded => (),
154				}
155
156				if let Ok((item, item_pos)) = (self.method)(input, pos) {
157					items.push(item);
158					pos = item_pos;
159				} else {
160					break;
161				}
162			}
163			if let Included(&min_count) = range.start() {
164				if items.len() < min_count {
165					return Err(Error::Mismatch {
166						message: format!(
167							"expect repeat at least {} times, found {} times",
168							min_count,
169							items.len()
170						),
171						position: start,
172					});
173				}
174			}
175			Ok((items, pos))
176		})
177	}
178
179	/// Give parser a name to identify parsing errors.
180	pub fn name(self, name: &'a str) -> Parser<'a, I, O>
181	where
182		O: 'a,
183	{
184		Parser::new(
185			move |input: &'a [I], start: usize| match (self.method)(input, start) {
186				res @ Ok(_) => res,
187				Err(err) => match err {
188					Error::Custom { .. } => Err(err),
189					_ => Err(Error::Custom {
190						message: format!("failed to parse {}", name),
191						position: start,
192						inner: Some(Box::new(err)),
193					}),
194				},
195			},
196		)
197	}
198
199	/// Mark parser as expected, abort early when failed in ordered choice.
200	pub fn expect(self, name: &'a str) -> Parser<'a, I, O>
201	where
202		O: 'a,
203	{
204		Parser::new(
205			move |input: &'a [I], start: usize| match (self.method)(input, start) {
206				res @ Ok(_) => res,
207				Err(err) => Err(Error::Expect {
208					message: format!("Expect {}", name),
209					position: start,
210					inner: Box::new(err),
211				}),
212			},
213		)
214	}
215}
216
217/// Always succeeds, consume no input.
218pub fn empty<'a, I>() -> Parser<'a, I, ()> {
219	Parser::new(|_: &[I], start: usize| Ok(((), start)))
220}
221
222/// Success when current input symbol equals `t`.
223pub fn sym<'a, I>(t: I) -> Parser<'a, I, I>
224where
225	I: Clone + PartialEq + Display,
226{
227	Parser::new(move |input: &'a [I], start: usize| {
228		if let Some(s) = input.get(start) {
229			if t == *s {
230				Ok((s.clone(), start + 1))
231			} else {
232				Err(Error::Mismatch {
233					message: format!("expect: {}, found: {}", t, s),
234					position: start,
235				})
236			}
237		} else {
238			Err(Error::Incomplete)
239		}
240	})
241}
242
243/// Success when sequence of symbols matches current input.
244pub fn seq<'a, 'b: 'a, I>(tag: &'b [I]) -> Parser<'a, I, &'a [I]>
245where
246	I: PartialEq + Debug,
247{
248	Parser::new(move |input: &'a [I], start: usize| {
249		let mut index = 0;
250		loop {
251			let pos = start + index;
252			if index == tag.len() {
253				return Ok((tag, pos));
254			}
255			if let Some(s) = input.get(pos) {
256				if tag[index] != *s {
257					return Err(Error::Mismatch {
258						message: format!("seq {:?} expect: {:?}, found: {:?}", tag, tag[index], s),
259						position: pos,
260					});
261				}
262			} else {
263				return Err(Error::Incomplete);
264			}
265			index += 1;
266		}
267	})
268}
269
270/// Success when tag matches current input.
271pub fn tag<'a, 'b: 'a>(tag: &'b str) -> Parser<'a, char, &'a str> {
272	Parser::new(move |input: &'a [char], start: usize| {
273		let mut pos = start;
274		for c in tag.chars() {
275			if let Some(&s) = input.get(pos) {
276				if c != s {
277					return Err(Error::Mismatch {
278						message: format!("tag {:?} expect: {:?}, found: {}", tag, c, s),
279						position: pos,
280					});
281				}
282			} else {
283				return Err(Error::Incomplete);
284			}
285			pos += 1;
286		}
287		Ok((tag, pos))
288	})
289}
290
291/// Parse separated list.
292pub fn list<'a, I, O, U>(
293	parser: Parser<'a, I, O>,
294	separator: Parser<'a, I, U>,
295) -> Parser<'a, I, Vec<O>>
296where
297	O: 'a,
298	U: 'a,
299{
300	Parser::new(move |input: &'a [I], start: usize| {
301		let mut items = vec![];
302		let mut pos = start;
303		if let Ok((first_item, first_pos)) = (parser.method)(input, pos) {
304			items.push(first_item);
305			pos = first_pos;
306			while let Ok((_, sep_pos)) = (separator.method)(input, pos) {
307				match (parser.method)(input, sep_pos) {
308					Ok((more_item, more_pos)) => {
309						items.push(more_item);
310						pos = more_pos;
311					}
312					Err(_) => break,
313				}
314			}
315		}
316		Ok((items, pos))
317	})
318}
319
320/// Success when current input symbol is one of the set.
321pub fn one_of<'a, I, S>(set: &'a S) -> Parser<'a, I, I>
322where
323	I: Clone + PartialEq + Display + Debug,
324	S: Set<I> + ?Sized,
325{
326	Parser::new(move |input: &'a [I], start: usize| {
327		if let Some(s) = input.get(start) {
328			if set.contains(s) {
329				Ok((s.clone(), start + 1))
330			} else {
331				Err(Error::Mismatch {
332					message: format!("expect one of: {}, found: {}", set.to_str(), s),
333					position: start,
334				})
335			}
336		} else {
337			Err(Error::Incomplete)
338		}
339	})
340}
341
342/// Success when current input symbol is none of the set.
343pub fn none_of<'a, I, S>(set: &'static S) -> Parser<'a, I, I>
344where
345	I: Clone + PartialEq + Display + Debug,
346	S: Set<I> + ?Sized,
347{
348	Parser::new(move |input: &'a [I], start: usize| {
349		if let Some(s) = input.get(start) {
350			if set.contains(s) {
351				Err(Error::Mismatch {
352					message: format!("expect none of: {}, found: {}", set.to_str(), s),
353					position: start,
354				})
355			} else {
356				Ok((s.clone(), start + 1))
357			}
358		} else {
359			Err(Error::Incomplete)
360		}
361	})
362}
363
364/// Success when predicate returns true on current input symbol.
365pub fn is_a<'a, I, F>(predicate: F) -> Parser<'a, I, I>
366where
367	I: Clone + PartialEq + Display + Debug,
368	F: Fn(I) -> bool + 'a,
369{
370	Parser::new(move |input: &'a [I], start: usize| {
371		if let Some(s) = input.get(start) {
372			if predicate(s.clone()) {
373				Ok((s.clone(), start + 1))
374			} else {
375				Err(Error::Mismatch {
376					message: format!("is_a predicate failed on: {}", s),
377					position: start,
378				})
379			}
380		} else {
381			Err(Error::Incomplete)
382		}
383	})
384}
385
386/// Success when predicate returns false on current input symbol.
387pub fn not_a<'a, I, F>(predicate: F) -> Parser<'a, I, I>
388where
389	I: Clone + PartialEq + Display + Debug,
390	F: Fn(I) -> bool + 'a,
391{
392	Parser::new(move |input: &'a [I], start: usize| {
393		if let Some(s) = input.get(start) {
394			if predicate(s.clone()) {
395				Err(Error::Mismatch {
396					message: format!("not_a predicate failed on: {}", s),
397					position: start,
398				})
399			} else {
400				Ok((s.clone(), start + 1))
401			}
402		} else {
403			Err(Error::Incomplete)
404		}
405	})
406}
407
408/// Read n symbols.
409pub fn take<'a, I>(n: usize) -> Parser<'a, I, &'a [I]>
410{
411	Parser::new(move |input: &'a [I], start: usize| {
412		let pos = start + n;
413		if input.len() >= pos {
414			Ok((&input[start..pos], pos))
415		} else {
416			Err(Error::Incomplete)
417		}
418	})
419}
420
421/// Skip n symbols.
422pub fn skip<'a, I>(n: usize) -> Parser<'a, I, ()>
423{
424	Parser::new(move |input: &'a [I], start: usize| {
425		let pos = start + n;
426		if input.len() >= pos {
427			Ok(((), pos))
428		} else {
429			Err(Error::Incomplete)
430		}
431	})
432}
433
434/// Call a parser factory, can be used to create recursive parsers.
435pub fn call<'a, I, O, F>(parser_factory: F) -> Parser<'a, I, O>
436where
437	O: 'a,
438	F: Fn() -> Parser<'a, I, O> + 'a,
439{
440	Parser::new(move |input: &'a [I], start: usize| {
441		let parser = parser_factory();
442		(parser.method)(input, start)
443	})
444}
445
446/// Success when end of input is reached.
447pub fn end<'a, I>() -> Parser<'a, I, ()>
448where
449	I: Display,
450{
451	Parser::new(|input: &'a [I], start: usize| {
452		if let Some(s) = input.get(start) {
453			Err(Error::Mismatch {
454				message: format!("expect end of input, found: {}", s),
455				position: start,
456			})
457		} else {
458			Ok(((), start))
459		}
460	})
461}
462
463/// Sequence reserve value
464impl<'a, I, O: 'a, U: 'a> Add<Parser<'a, I, U>> for Parser<'a, I, O> {
465	type Output = Parser<'a, I, (O, U)>;
466
467	fn add(self, other: Parser<'a, I, U>) -> Self::Output {
468		Parser::new(move |input: &'a [I], start: usize| {
469			(self.method)(input, start).and_then(|(out1, pos1)| {
470				(other.method)(input, pos1).map(|(out2, pos2)| ((out1, out2), pos2))
471			})
472		})
473	}
474}
475
476/// Sequence discard second value
477impl<'a, I, O: 'a, U: 'a> Sub<Parser<'a, I, U>> for Parser<'a, I, O> {
478	type Output = Parser<'a, I, O>;
479
480	fn sub(self, other: Parser<'a, I, U>) -> Self::Output {
481		Parser::new(move |input: &'a [I], start: usize| {
482			(self.method)(input, start)
483				.and_then(|(out1, pos1)| (other.method)(input, pos1).map(|(_, pos2)| (out1, pos2)))
484		})
485	}
486}
487
488/// Sequence discard first value
489impl<'a, I: 'a, O: 'a, U: 'a> Mul<Parser<'a, I, U>> for Parser<'a, I, O> {
490	type Output = Parser<'a, I, U>;
491
492	fn mul(self, other: Parser<'a, I, U>) -> Self::Output {
493		Parser::new(move |input: &'a [I], start: usize| {
494			(self.method)(input, start)
495				.and_then(|(_, pos1)| (other.method)(input, pos1).map(|(out2, pos2)| (out2, pos2)))
496		})
497	}
498}
499
500/// Chain two parsers where the second parser depends on the first's result.
501impl<'a, I, O: 'a, U: 'a, F: Fn(O) -> Parser<'a, I, U> + 'a> Shr<F> for Parser<'a, I, O> {
502	type Output = Parser<'a, I, U>;
503
504	fn shr(self, other: F) -> Self::Output {
505		Parser::new(move |input: &'a [I], start: usize| {
506			(self.method)(input, start).and_then(|(out, pos)| (other(out).method)(input, pos))
507		})
508	}
509}
510
511/// Ordered choice
512impl<'a, I, O: 'a> BitOr for Parser<'a, I, O> {
513	type Output = Parser<'a, I, O>;
514
515	fn bitor(self, other: Parser<'a, I, O>) -> Self::Output {
516		Parser::new(
517			move |input: &'a [I], start: usize| match (self.method)(input, start) {
518				Ok(out) => Ok(out),
519				Err(err) => match err {
520					Error::Expect { .. } => Err(err),
521					_ => (other.method)(input, start),
522				},
523			},
524		)
525	}
526}
527
528/// And predicate
529impl<'a, I, O: 'a> Neg for Parser<'a, I, O> {
530	type Output = Parser<'a, I, bool>;
531
532	fn neg(self) -> Self::Output {
533		Parser::new(move |input: &'a [I], start: usize| {
534			(self.method)(input, start).map(|_| (true, start))
535		})
536	}
537}
538
539/// Not predicate
540impl<'a, I, O: 'a> Not for Parser<'a, I, O> {
541	type Output = Parser<'a, I, bool>;
542
543	fn not(self) -> Self::Output {
544		Parser::new(
545			move |input: &'a [I], start: usize| match (self.method)(input, start) {
546				Ok(_) => Err(Error::Mismatch {
547					message: "not predicate failed".to_string(),
548					position: start,
549				}),
550				Err(_) => Ok((true, start)),
551			},
552		)
553	}
554}
555
556#[cfg(test)]
557mod tests {
558	use crate::parser::*;
559	use crate::Error;
560
561	#[test]
562	fn byte_works() {
563		let input = b"abcde";
564		let parser = sym(b'a') + one_of(b"ab") - sym(b'C');
565		let output = parser.parse(input);
566		assert_eq!(
567			output,
568			Err(Error::Mismatch {
569				message: "expect: 67, found: 99".to_string(),
570				position: 2
571			})
572		);
573
574		let parser = sym(b'a') * none_of(b"AB") - sym(b'c') + seq(b"de");
575		let output = parser.parse(input);
576		assert_eq!(output, Ok((b'b', &b"de"[..])));
577		assert_eq!(parser.pos().parse(input), Ok(5));
578
579		let parser = sym(b'e') | sym(b'd').expect("d") | empty().map(|_| b'0');
580		let output = parser.parse(input);
581		assert_eq!(
582			output,
583			Err(Error::Expect {
584				message: "Expect d".to_owned(),
585				position: 0,
586				inner: Box::new(Error::Mismatch {
587					message: "expect: 100, found: 97".to_string(),
588					position: 0
589				})
590			})
591		);
592	}
593
594	#[test]
595	fn char_works() {
596		let input = "abcd".chars().collect::<Vec<char>>();
597		let parser = tag("ab") + sym('c') | sym('d').map(|_| ("", '0'));
598		let output = parser.parse(&input);
599		assert_eq!(output, Ok(("ab", 'c')));
600	}
601
602	#[test]
603	fn recursive_parser() {
604		#[derive(Debug, PartialEq)]
605		enum Expr {
606			Empty,
607			Group(Box<Expr>),
608		}
609		fn expr() -> Parser<'static, u8, Expr> {
610			(sym(b'(') + call(expr) - sym(b')')).map(|(_, e)| Expr::Group(Box::new(e)))
611				| empty().map(|_| Expr::Empty)
612		}
613		let input = b"(())";
614		let parser = expr();
615		let output = parser.parse(input);
616		assert_eq!(
617			output,
618			Ok(Expr::Group(Box::new(Expr::Group(Box::new(Expr::Empty)))))
619		);
620	}
621
622	#[test]
623	fn chain_parser() {
624		let input = b"5oooooooo";
625		{
626			let parser = one_of(b"0123456789").map(|c| c - b'0')
627				>> |n| take(n as usize) + sym(b'o').repeat(0..);
628			assert_eq!(parser.parse(input), Ok((&b"ooooo"[..], vec![b'o'; 3])));
629		}
630		{
631			let parser =
632				skip(1) * take(3) >> |v: &'static [u8]| take(v.len() + 2).map(move |u| (u, v));
633			assert_eq!(parser.parse(input), Ok((&b"ooooo"[..], &b"ooo"[..])));
634		}
635		{
636			let parser = Parser::new(move |input, start| {
637				(skip(1) * take(3))
638					.parse_at(input, start)
639					.and_then(|(v, pos)| {
640						take(v.len() + 2)
641							.parse_at(input, pos)
642							.map(|(u, end)| ((u, v), end))
643					})
644			});
645			assert_eq!(parser.parse(input), Ok((&b"ooooo"[..], &b"ooo"[..])));
646		}
647	}
648
649	#[test]
650	fn repeat_at_least() {
651		let input = b"xxxooo";
652
653		{
654			let parser = sym(b'x').repeat(1..2);
655			let output = parser.parse(input);
656			assert_eq!(output, Ok(vec![b'x'; 1]))
657		}
658
659		{
660			let parser = sym(b'x').repeat(1..);
661			let output = parser.parse(input);
662			assert_eq!(output, Ok(vec![b'x'; 3]))
663		}
664
665		{
666			let parser = sym(b'x').repeat(0..);
667			let output = parser.parse(input);
668			assert_eq!(output, Ok(vec![b'x'; 3]))
669		}
670
671		{
672			let parser = sym(b'y').repeat(0..);
673			let output = parser.parse(input);
674			assert_eq!(output, Ok(vec![]))
675		}
676
677		{
678			let parser = sym(b'y').repeat(1..);
679			let output = parser.parse(input);
680			assert!(output.is_err());
681		}
682
683		{
684			let parser = sym(b'x').repeat(10..);
685			let output = parser.parse(input);
686			assert!(output.is_err());
687		}
688	}
689
690	#[test]
691	fn repeat_up_to() {
692		let input = b"xxxooo";
693
694		{
695			let parser = sym(b'x').repeat(..2);
696			let output = parser.parse(input);
697			assert_eq!(output, Ok(vec![b'x'; 1]))
698		}
699
700		{
701			let parser = sym(b'x').repeat(..4);
702			let output = parser.parse(input);
703			assert_eq!(output, Ok(vec![b'x'; 3]))
704		}
705
706		{
707			let parser = sym(b'x').repeat(..);
708			let output = parser.parse(input);
709			assert_eq!(output, Ok(vec![b'x'; 3]))
710		}
711
712		{
713			let parser = sym(b'x').repeat(..0);
714			let output = parser.parse(input);
715			assert_eq!(output, Ok(vec![]))
716		}
717
718		{
719			let parser = sym(b'x').repeat(..10);
720			let output = parser.parse(input);
721			assert_eq!(output, Ok(vec![b'x'; 3]))
722		}
723	}
724
725	#[test]
726	fn repeat_exactly() {
727		let input = b"xxxooo";
728
729		{
730			let parser = sym(b'x').repeat(0);
731			let output = parser.parse(input);
732			assert_eq!(output, Ok(vec![]))
733		}
734
735		{
736			let parser = sym(b'x').repeat(1);
737			let output = parser.parse(input);
738			assert_eq!(output, Ok(vec![b'x'; 1]))
739		}
740
741		{
742			let parser = sym(b'x').repeat(2);
743			let output = parser.parse(input);
744			assert_eq!(output, Ok(vec![b'x'; 2]))
745		}
746
747		{
748			let parser = sym(b'x').repeat(3);
749			let output = parser.parse(input);
750			assert_eq!(output, Ok(vec![b'x'; 3]))
751		}
752
753		{
754			let parser = sym(b'x').repeat(4);
755			let output = parser.parse(input);
756			assert!(output.is_err())
757		}
758	}
759}