1#![allow(clippy::type_complexity)]
2
3pub use kalosm_parse_macro::*;
4mod integer;
5use std::{
6 any::Any,
7 borrow::Cow,
8 error::Error,
9 fmt::{Debug, Display},
10 ops::Deref,
11 sync::{Arc, Mutex, OnceLock},
12};
13
14pub use integer::*;
15mod float;
16pub use float::*;
17mod literal;
18pub use literal::*;
19mod or;
20pub use or::*;
21mod then;
22pub use then::*;
23mod string;
24pub use string::*;
25mod repeat;
26pub use repeat::*;
27mod separated;
28pub use separated::*;
29mod parse;
30pub use parse::*;
31mod word;
32pub use word::*;
33mod sentence;
34pub use sentence::*;
35mod stop_on;
36pub use stop_on::*;
37mod map;
38pub use map::*;
39mod regex;
40pub use regex::*;
41mod arc_linked_list;
42pub(crate) use arc_linked_list::*;
43mod schema;
44pub use schema::*;
45mod index;
46pub use index::*;
47mod one_line;
48pub use one_line::*;
49
50#[derive(Debug, Clone)]
52pub struct ParserError(Arc<dyn std::error::Error + Send + Sync + 'static>);
53
54#[macro_export]
56macro_rules! bail {
57 ($msg:literal $(,)?) => {
58 return $crate::ParseResult::Err($crate::ParserError::msg($msg))
59 };
60 ($err:expr $(,)?) => {
61 return $crate::ParseResult::Err($crate::ParserError::from($err))
62 };
63 ($fmt:expr, $($arg:tt)*) => {
64 return $crate::ParseResult::Err($crate::ParserError::msg(format!($fmt, $($arg)*)))
65 };
66}
67
68impl ParserError {
69 pub fn msg(msg: impl Display + Debug + Send + Sync + 'static) -> Self {
71 #[derive(Debug)]
72 struct CustomError(String);
73 impl std::error::Error for CustomError {}
74 impl Display for CustomError {
75 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
76 write!(f, "{}", self.0)
77 }
78 }
79 Self(Arc::new(CustomError(msg.to_string())))
80 }
81}
82
83impl PartialEq for ParserError {
84 fn eq(&self, other: &Self) -> bool {
85 Arc::ptr_eq(&self.0, &other.0)
86 }
87}
88
89impl Eq for ParserError {}
90
91impl AsRef<dyn Error> for ParserError {
92 fn as_ref(&self) -> &(dyn Error + 'static) {
93 self.0.as_ref()
94 }
95}
96
97impl AsRef<dyn std::error::Error + Send + Sync + 'static> for ParserError {
98 fn as_ref(&self) -> &(dyn std::error::Error + Send + Sync + 'static) {
99 self.0.as_ref()
100 }
101}
102
103impl Deref for ParserError {
104 type Target = (dyn Error + Send + Sync + 'static);
105
106 fn deref(&self) -> &(dyn Error + Send + Sync + 'static) {
107 self.0.as_ref()
108 }
109}
110
111impl<E> From<E> for ParserError
112where
113 E: std::error::Error + Send + Sync + 'static,
114{
115 fn from(value: E) -> Self {
116 Self(Arc::new(value))
117 }
118}
119
120pub type ParseResult<T> = std::result::Result<T, ParserError>;
122
123pub trait SendCreateParserState:
125 Send + Sync + CreateParserState<PartialState: Send + Sync, Output: Send + Sync>
126{
127}
128
129impl<P: CreateParserState<PartialState: Send + Sync, Output: Send + Sync> + Send + Sync>
130 SendCreateParserState for P
131{
132}
133
134pub trait CreateParserState: Parser {
136 fn create_parser_state(&self) -> <Self as Parser>::PartialState;
138}
139
140impl<P: ?Sized + CreateParserState> CreateParserState for &P {
141 fn create_parser_state(&self) -> <Self as Parser>::PartialState {
142 (*self).create_parser_state()
143 }
144}
145
146impl<P: ?Sized + CreateParserState> CreateParserState for Box<P> {
147 fn create_parser_state(&self) -> <Self as Parser>::PartialState {
148 (**self).create_parser_state()
149 }
150}
151
152impl<P: ?Sized + CreateParserState> CreateParserState for Arc<P> {
153 fn create_parser_state(&self) -> <Self as Parser>::PartialState {
154 (**self).create_parser_state()
155 }
156}
157
158impl<O: Clone> CreateParserState for ArcParser<O> {
159 fn create_parser_state(&self) -> <Self as Parser>::PartialState {
160 self.0.create_parser_state()
161 }
162}
163
164pub trait Parser {
166 type Output: Clone;
168 type PartialState: Clone;
170
171 fn parse<'a>(
173 &self,
174 state: &Self::PartialState,
175 input: &'a [u8],
176 ) -> ParseResult<ParseStatus<'a, Self::PartialState, Self::Output>>;
177}
178
179impl Parser for () {
180 type Output = ();
181 type PartialState = ();
182
183 fn parse<'a>(
184 &self,
185 _state: &Self::PartialState,
186 input: &'a [u8],
187 ) -> ParseResult<ParseStatus<'a, Self::PartialState, Self::Output>> {
188 Ok(ParseStatus::Finished {
189 result: (),
190 remaining: input,
191 })
192 }
193}
194
195impl<P: ?Sized + Parser> Parser for &P {
196 type Output = P::Output;
197 type PartialState = P::PartialState;
198
199 fn parse<'a>(
200 &self,
201 state: &Self::PartialState,
202 input: &'a [u8],
203 ) -> ParseResult<ParseStatus<'a, Self::PartialState, Self::Output>> {
204 (*self).parse(state, input)
205 }
206}
207
208impl<P: ?Sized + Parser> Parser for Box<P> {
209 type Output = P::Output;
210 type PartialState = P::PartialState;
211
212 fn parse<'a>(
213 &self,
214 state: &Self::PartialState,
215 input: &'a [u8],
216 ) -> ParseResult<ParseStatus<'a, Self::PartialState, Self::Output>> {
217 let _self: &P = self;
218 _self.parse(state, input)
219 }
220}
221
222impl<P: ?Sized + Parser> Parser for Arc<P> {
223 type Output = P::Output;
224 type PartialState = P::PartialState;
225
226 fn parse<'a>(
227 &self,
228 state: &Self::PartialState,
229 input: &'a [u8],
230 ) -> ParseResult<ParseStatus<'a, Self::PartialState, Self::Output>> {
231 let _self: &P = self;
232 _self.parse(state, input)
233 }
234}
235
236trait AnyCreateParserState:
237 Parser<PartialState = Arc<dyn Any + Send + Sync>> + CreateParserState + Send + Sync
238{
239}
240
241impl<P: Parser<PartialState = Arc<dyn Any + Send + Sync>> + CreateParserState + Send + Sync>
242 AnyCreateParserState for P
243{
244}
245
246pub struct ArcParser<O = ()>(Arc<dyn AnyCreateParserState<Output = O> + Send + Sync>);
248
249impl<O> Clone for ArcParser<O> {
250 fn clone(&self) -> Self {
251 Self(self.0.clone())
252 }
253}
254
255impl<O> ArcParser<O> {
256 fn new<P>(parser: P) -> Self
257 where
258 P: Parser<Output = O, PartialState = Arc<dyn Any + Send + Sync>>
259 + CreateParserState
260 + Send
261 + Sync
262 + 'static,
263 {
264 ArcParser(Arc::new(parser))
265 }
266}
267
268impl<O: Clone> Parser for ArcParser<O> {
269 type Output = O;
270 type PartialState = Arc<dyn Any + Send + Sync>;
271
272 fn parse<'a>(
273 &self,
274 state: &Self::PartialState,
275 input: &'a [u8],
276 ) -> ParseResult<ParseStatus<'a, Self::PartialState, Self::Output>> {
277 let _self: &dyn Parser<Output = O, PartialState = Arc<dyn Any + Send + Sync>> = &self.0;
278 _self.parse(state, input)
279 }
280}
281
282struct AnyParser<P>(P);
284
285impl<P> Parser for AnyParser<P>
286where
287 P: Parser,
288 P::PartialState: Send + Sync + 'static,
289{
290 type Output = P::Output;
291 type PartialState = Arc<dyn Any + Sync + Send>;
292
293 fn parse<'a>(
294 &self,
295 state: &Self::PartialState,
296 input: &'a [u8],
297 ) -> ParseResult<ParseStatus<'a, Self::PartialState, Self::Output>> {
298 let state = state.downcast_ref::<P::PartialState>().ok_or_else(|| {
299 struct StateIsNotOfTheCorrectType;
300 impl std::fmt::Display for StateIsNotOfTheCorrectType {
301 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
302 write!(f, "State is not of the correct type")
303 }
304 }
305 impl std::fmt::Debug for StateIsNotOfTheCorrectType {
306 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
307 write!(f, "State is not of the correct type")
308 }
309 }
310 impl Error for StateIsNotOfTheCorrectType {}
311 StateIsNotOfTheCorrectType
312 })?;
313 self.0
314 .parse(state, input)
315 .map(|result| result.map_state(|state| Arc::new(state) as Arc<dyn Any + Sync + Send>))
316 }
317}
318
319impl<P: CreateParserState> CreateParserState for AnyParser<P>
320where
321 P: Parser,
322 P::Output: Send + Sync + 'static,
323 P::PartialState: Send + Sync + 'static,
324{
325 fn create_parser_state(&self) -> <Self as Parser>::PartialState {
326 Arc::new(self.0.create_parser_state())
327 }
328}
329
330pub trait ParserExt: Parser {
332 fn otherwise<V: Parser>(self, other: V) -> ChoiceParser<Self, V>
334 where
335 Self: Sized,
336 {
337 ChoiceParser {
338 parser1: self,
339 parser2: other,
340 }
341 }
342
343 fn or<V: Parser<Output = Self::Output>>(
345 self,
346 other: V,
347 ) -> MapOutputParser<ChoiceParser<Self, V>, Self::Output>
348 where
349 Self: Sized,
350 {
351 self.otherwise(other).map_output(|either| match either {
352 Either::Left(left) => left,
353 Either::Right(right) => right,
354 })
355 }
356
357 fn then<V: Parser>(self, other: V) -> SequenceParser<Self, V>
359 where
360 Self: Sized,
361 {
362 SequenceParser::new(self, other)
363 }
364
365 fn then_lazy<V, F>(self, other: F) -> ThenLazy<Self, F>
367 where
368 Self: Sized,
369 V: CreateParserState,
370 F: Fn(&Self::Output) -> V,
371 {
372 ThenLazy::new(self, other)
373 }
374
375 fn ignore_output_then<V: CreateParserState>(
377 self,
378 other: V,
379 ) -> MapOutputParser<SequenceParser<Self, V>, <V as Parser>::Output>
380 where
381 Self: Sized,
382 {
383 SequenceParser::new(self, other).map_output(|(_, second)| second)
384 }
385
386 fn then_ignore_output<V: CreateParserState>(
388 self,
389 other: V,
390 ) -> MapOutputParser<SequenceParser<Self, V>, <Self as Parser>::Output>
391 where
392 Self: Sized,
393 {
394 SequenceParser::new(self, other).map_output(|(first, _)| first)
395 }
396
397 fn then_literal(
399 self,
400 literal: impl Into<Cow<'static, str>>,
401 ) -> MapOutputParser<SequenceParser<Self, LiteralParser>, <Self as Parser>::Output>
402 where
403 Self: Sized,
404 {
405 self.then_ignore_output(LiteralParser::new(literal))
406 }
407
408 fn repeat(self, length_range: std::ops::RangeInclusive<usize>) -> RepeatParser<Self>
410 where
411 Self: Sized,
412 {
413 RepeatParser::new(self, length_range)
414 }
415
416 fn map_output<F, O>(self, f: F) -> MapOutputParser<Self, O, F>
418 where
419 Self: Sized,
420 F: Fn(Self::Output) -> O,
421 {
422 MapOutputParser {
423 parser: self,
424 map: f,
425 _output: std::marker::PhantomData,
426 }
427 }
428
429 fn boxed(self) -> ArcParser<Self::Output>
431 where
432 Self: CreateParserState + Sized + Send + Sync + 'static,
433 Self::Output: Send + Sync + 'static,
434 Self::PartialState: Send + Sync + 'static,
435 {
436 ArcParser::new(AnyParser(self))
437 }
438
439 fn with_initial_state<F: Fn() -> Self::PartialState + Clone>(
441 self,
442 initial_state: F,
443 ) -> WithInitialState<Self, F>
444 where
445 Self: Sized,
446 {
447 WithInitialState::new(self, initial_state)
448 }
449}
450
451impl<P: Parser> ParserExt for P {}
452
453pub struct WithInitialState<P, F> {
455 parser: P,
456 initial_state: F,
457}
458
459impl<P: Parser, F: Fn() -> P::PartialState + Clone> WithInitialState<P, F> {
460 pub fn new(parser: P, initial_state: F) -> Self {
462 Self {
463 parser,
464 initial_state,
465 }
466 }
467}
468
469impl<P: Parser, F: Fn() -> P::PartialState + Clone> CreateParserState for WithInitialState<P, F> {
470 fn create_parser_state(&self) -> <Self as Parser>::PartialState {
471 (self.initial_state)()
472 }
473}
474
475impl<P: Parser, F: Fn() -> P::PartialState + Clone> Parser for WithInitialState<P, F> {
476 type Output = P::Output;
477 type PartialState = P::PartialState;
478
479 fn parse<'a>(
480 &self,
481 state: &Self::PartialState,
482 input: &'a [u8],
483 ) -> ParseResult<ParseStatus<'a, Self::PartialState, Self::Output>> {
484 self.parser.parse(state, input)
485 }
486}
487
488pub struct LazyParser<P, F> {
490 parser: Arc<OnceLock<P>>,
491 parser_fn: Arc<Mutex<Option<F>>>,
492}
493
494impl<P: Parser, F: FnOnce() -> P> LazyParser<P, F> {
495 pub fn new(parser_fn: F) -> Self {
497 Self {
498 parser: Arc::new(OnceLock::new()),
499 parser_fn: Arc::new(Mutex::new(Some(parser_fn))),
500 }
501 }
502
503 fn get_parser(&self) -> &P {
504 self.parser
505 .get_or_init(|| (self.parser_fn.lock().unwrap().take().unwrap())())
506 }
507}
508
509impl<P: CreateParserState, F: FnOnce() -> P> CreateParserState for LazyParser<P, F> {
510 fn create_parser_state(&self) -> <Self as Parser>::PartialState {
511 self.get_parser().create_parser_state()
512 }
513}
514
515impl<P: CreateParserState, F: FnOnce() -> P> From<F> for LazyParser<P, F> {
516 fn from(parser_fn: F) -> Self {
517 Self::new(parser_fn)
518 }
519}
520
521impl<P: Parser, F: FnOnce() -> P> Parser for LazyParser<P, F> {
522 type Output = P::Output;
523 type PartialState = P::PartialState;
524
525 fn parse<'a>(
526 &self,
527 state: &Self::PartialState,
528 input: &'a [u8],
529 ) -> ParseResult<ParseStatus<'a, Self::PartialState, Self::Output>> {
530 self.get_parser().parse(state, input)
531 }
532}
533
534#[derive(Debug, PartialEq, Eq, Clone)]
536pub enum OwnedParseResult<P, R> {
537 Incomplete {
539 new_state: P,
541 required_next: Cow<'static, str>,
543 },
544 Finished {
546 result: R,
548 remaining: Vec<u8>,
550 },
551}
552
553impl<P, R> From<ParseStatus<'_, P, R>> for OwnedParseResult<P, R> {
554 fn from(result: ParseStatus<P, R>) -> Self {
555 match result {
556 ParseStatus::Incomplete {
557 new_state,
558 required_next,
559 } => OwnedParseResult::Incomplete {
560 new_state,
561 required_next,
562 },
563 ParseStatus::Finished { result, remaining } => OwnedParseResult::Finished {
564 result,
565 remaining: remaining.to_vec(),
566 },
567 }
568 }
569}
570
571#[derive(Debug, PartialEq, Eq, Clone)]
573pub enum ParseStatus<'a, P, R> {
574 Incomplete {
576 new_state: P,
578 required_next: Cow<'static, str>,
580 },
581 Finished {
583 result: R,
585 remaining: &'a [u8],
587 },
588}
589
590impl<'a, P, R> ParseStatus<'a, P, R> {
591 pub fn without_remaining(self) -> ParseStatus<'static, P, R> {
593 match self {
594 ParseStatus::Finished { result, .. } => ParseStatus::Finished {
595 result,
596 remaining: &[],
597 },
598 ParseStatus::Incomplete {
599 new_state,
600 required_next,
601 } => ParseStatus::Incomplete {
602 new_state,
603 required_next,
604 },
605 }
606 }
607
608 pub fn unwrap_finished(self) -> R {
610 match self {
611 ParseStatus::Finished { result, .. } => result,
612 ParseStatus::Incomplete { .. } => {
613 panic!("called `ParseStatus::unwrap_finished()` on an `Incomplete` value")
614 }
615 }
616 }
617
618 pub fn unwrap_incomplete(self) -> (P, Cow<'static, str>) {
620 match self {
621 ParseStatus::Finished { .. } => {
622 panic!("called `ParseStatus::unwrap_incomplete()` on a `Finished` value")
623 }
624 ParseStatus::Incomplete {
625 new_state,
626 required_next,
627 } => (new_state, required_next),
628 }
629 }
630
631 pub fn map<F, O>(self, f: F) -> ParseStatus<'a, P, O>
633 where
634 F: FnOnce(R) -> O,
635 {
636 match self {
637 ParseStatus::Finished { result, remaining } => ParseStatus::Finished {
638 result: f(result),
639 remaining,
640 },
641 ParseStatus::Incomplete {
642 new_state,
643 required_next,
644 } => ParseStatus::Incomplete {
645 new_state,
646 required_next,
647 },
648 }
649 }
650
651 pub fn map_state<F, O>(self, f: F) -> ParseStatus<'a, O, R>
653 where
654 F: FnOnce(P) -> O,
655 {
656 match self {
657 ParseStatus::Finished { result, remaining } => {
658 ParseStatus::Finished { result, remaining }
659 }
660 ParseStatus::Incomplete {
661 new_state,
662 required_next,
663 } => ParseStatus::Incomplete {
664 new_state: f(new_state),
665 required_next,
666 },
667 }
668 }
669}
670
671#[derive(Debug, Clone)]
673pub enum StructureParser {
674 Literal(Cow<'static, str>),
676 Num {
678 min: f64,
680 max: f64,
682 integer: bool,
684 },
685 Either {
687 first: Box<StructureParser>,
689 second: Box<StructureParser>,
691 },
692 Then {
694 first: Box<StructureParser>,
696 second: Box<StructureParser>,
698 },
699}
700
701#[allow(missing_docs)]
703#[derive(Debug, PartialEq, Clone)]
704pub enum StructureParserState {
705 Literal(LiteralParserOffset),
706 NumInt(IntegerParserState),
707 Num(FloatParserState),
708 Either(ChoiceParserState<Box<StructureParserState>, Box<StructureParserState>>),
709 Then(SequenceParserState<Box<StructureParserState>, Box<StructureParserState>, ()>),
710}
711
712impl CreateParserState for StructureParser {
713 fn create_parser_state(&self) -> <Self as Parser>::PartialState {
714 match self {
715 StructureParser::Literal(literal) => StructureParserState::Literal(
716 LiteralParser::from(literal.clone()).create_parser_state(),
717 ),
718 StructureParser::Num { min, max, integer } => {
719 if *integer {
720 StructureParserState::NumInt(
721 IntegerParser::new(*min as i128..=*max as i128).create_parser_state(),
722 )
723 } else {
724 StructureParserState::Num(FloatParser::new(*min..=*max).create_parser_state())
725 }
726 }
727 StructureParser::Either { first, second } => {
728 StructureParserState::Either(ChoiceParserState::new(
729 Box::new(first.create_parser_state()),
730 Box::new(second.create_parser_state()),
731 ))
732 }
733 StructureParser::Then { first, .. } => StructureParserState::Then(
734 SequenceParserState::FirstParser(Box::new(first.create_parser_state())),
735 ),
736 }
737 }
738}
739
740impl Parser for StructureParser {
741 type Output = ();
742 type PartialState = StructureParserState;
743
744 fn parse<'a>(
745 &self,
746 state: &Self::PartialState,
747 input: &'a [u8],
748 ) -> ParseResult<ParseStatus<'a, Self::PartialState, Self::Output>> {
749 match (self, state) {
750 (StructureParser::Literal(lit_parser), StructureParserState::Literal(state)) => {
751 LiteralParser::from(lit_parser.clone())
752 .parse(state, input)
753 .map(|result| result.map(|_| ()).map_state(StructureParserState::Literal))
754 }
755 (
756 StructureParser::Num {
757 min,
758 max,
759 integer: false,
760 },
761 StructureParserState::Num(state),
762 ) => FloatParser::new(*min..=*max)
763 .parse(state, input)
764 .map(|result| result.map(|_| ()).map_state(StructureParserState::Num)),
765 (
766 StructureParser::Num {
767 min,
768 max,
769 integer: true,
770 },
771 StructureParserState::NumInt(int),
772 ) => IntegerParser::new(*min as i128..=*max as i128)
773 .parse(int, input)
774 .map(|result| result.map(|_| ()).map_state(StructureParserState::NumInt)),
775 (StructureParser::Either { first, second }, StructureParserState::Either(state)) => {
776 let state = ChoiceParserState {
777 state1: state
778 .state1
779 .as_ref()
780 .map(|state| (**state).clone())
781 .map_err(Clone::clone),
782 state2: state
783 .state2
784 .as_ref()
785 .map(|state| (**state).clone())
786 .map_err(Clone::clone),
787 };
788 let parser = ChoiceParser::new(first.clone(), second.clone());
789 parser.parse(&state, input).map(|result| match result {
790 ParseStatus::Incomplete { required_next, .. } => ParseStatus::Incomplete {
791 new_state: StructureParserState::Either(ChoiceParserState {
792 state1: state.state1.map(Box::new),
793 state2: state.state2.map(Box::new),
794 }),
795 required_next,
796 },
797 ParseStatus::Finished { remaining, .. } => ParseStatus::Finished {
798 result: (),
799 remaining,
800 },
801 })
802 }
803 (StructureParser::Then { first, second }, StructureParserState::Then(state)) => {
804 let state = SequenceParserState::FirstParser(match &state {
805 SequenceParserState::FirstParser(state) => (**state).clone(),
806 SequenceParserState::SecondParser(state, _) => (**state).clone(),
807 });
808 let parser = SequenceParser::new(first.clone(), second.clone());
809 parser.parse(&state, input).map(|result| match result {
810 ParseStatus::Incomplete { required_next, .. } => ParseStatus::Incomplete {
811 new_state: StructureParserState::Then(match state {
812 SequenceParserState::FirstParser(state) => {
813 SequenceParserState::FirstParser(Box::new(state))
814 }
815 SequenceParserState::SecondParser(state, _) => {
816 SequenceParserState::SecondParser(Box::new(state), ())
817 }
818 }),
819 required_next,
820 },
821 ParseStatus::Finished { remaining, .. } => ParseStatus::Finished {
822 result: (),
823 remaining,
824 },
825 })
826 }
827 _ => unreachable!(),
828 }
829 }
830}