aott/
input.rs

1#![allow(dead_code)]
2use crate::{
3        error::{Error, Located, Span},
4        extra,
5        parser::{Parser, ParserExtras},
6        text::Char,
7};
8use core::{
9        hash::Hash,
10        ops::{Range, RangeFrom},
11};
12use num_traits::{One, Zero};
13
14#[allow(clippy::module_name_repetitions)]
15pub trait InputType {
16        #[doc(hidden)]
17        type Offset: Copy + Hash + Ord + Into<usize> + Zero + One;
18        type Token;
19
20        #[doc(hidden)]
21        fn start(&self) -> Self::Offset;
22
23        /// # Safety
24        /// If `offset` is not strictly the one provided by `Self::start` or returned as the first tuple value from this function,
25        /// calling `next` is undefined behavior. It may index memory outside of the desired range, it may segfault, it may panic etc. etc.
26        /// Stay safe and don't use this api unless you want to explode.
27        #[doc(hidden)]
28        unsafe fn next(&self, offset: Self::Offset) -> (Self::Offset, Option<Self::Token>);
29
30        #[doc(hidden)]
31        fn prev(offset: Self::Offset) -> Self::Offset;
32}
33
34impl<'a> InputType for &'a str {
35        type Token = char;
36        type Offset = usize;
37
38        #[inline]
39        fn start(&self) -> Self::Offset {
40                0
41        }
42
43        fn prev(offset: Self::Offset) -> Self::Offset {
44                offset.saturating_sub(1)
45        }
46
47        #[inline(always)]
48        unsafe fn next(&self, offset: Self::Offset) -> (Self::Offset, Option<Self::Token>) {
49                if offset < self.len() {
50                        // SAFETY: `offset < self.len()` above guarantees offset is in-bounds
51                        //         We only ever return offsets that are at a character boundary
52                        let c = unsafe {
53                                self.get_unchecked(offset..)
54                                        .chars()
55                                        .next()
56                                        .unwrap_unchecked()
57                        };
58                        (offset + c.len_utf8(), Some(c))
59                } else {
60                        (offset, None)
61                }
62        }
63}
64
65impl<'a, T: Clone> InputType for &'a [T] {
66        type Offset = usize;
67        type Token = T;
68
69        unsafe fn next(&self, offset: Self::Offset) -> (Self::Offset, Option<Self::Token>) {
70                if offset < self.len() {
71                        // SAFETY: `offset < self.len()` above guarantees offset is in-bounds
72                        //         We only ever return offsets that are at a character boundary
73                        let tok = unsafe { self.get_unchecked(offset) };
74                        (offset + 1, Some(tok.clone()))
75                } else {
76                        (offset, None)
77                }
78        }
79
80        fn start(&self) -> Self::Offset {
81                0
82        }
83
84        fn prev(offset: Self::Offset) -> Self::Offset {
85                offset.saturating_sub(1)
86        }
87}
88
89#[doc(hidden)]
90pub trait ExactSizeInput: InputType {
91        unsafe fn span_from(&self, range: RangeFrom<Self::Offset>) -> Range<usize>;
92}
93
94#[doc(hidden)]
95#[derive(Debug)]
96pub struct Errors<T, E> {
97        pub alt: Option<Located<T, E>>,
98        pub secondary: Vec<Located<T, E>>,
99}
100
101impl<T, E> Default for Errors<T, E> {
102        fn default() -> Self {
103                Self {
104                        alt: None,
105                        secondary: vec![],
106                }
107        }
108}
109
110pub struct InputOwned<I: InputType, E: ParserExtras<I> = extra::Err<I>> {
111        pub(crate) input: I,
112        pub(crate) cx: E::Context,
113        errors: Errors<I::Offset, E::Error>,
114}
115
116impl<I: InputType, E: ParserExtras<I>> InputOwned<I, E> {
117        pub fn from_input(input: I) -> Self
118        where
119                E::Context: Default,
120        {
121                Self {
122                        input,
123                        cx: E::Context::default(),
124                        errors: Errors::default(),
125                }
126        }
127        pub fn as_ref_at_zero(&mut self) -> Input<'_, I, E> {
128                Input {
129                        offset: self.input.start(),
130                        input: &self.input,
131                        cx: &self.cx,
132                }
133        }
134        pub fn as_ref_at(&mut self, offset: I::Offset) -> Input<'_, I, E> {
135                Input {
136                        offset,
137                        input: &self.input,
138                        cx: &self.cx,
139                }
140        }
141}
142
143/// **Warning** `InputOwned` and `Input` are an unstable API.
144/// This could change at any time without notice.
145/// Please consider using primitives like `any` over functions in this struct. Please.
146/// If you do, support is not guaranteed.
147/// Changing the `offset` to arbitrary values could lead to undefined behavior. Don't modify anything in this struct if you want to be free of UB and/or segfaults.
148#[derive(Debug)]
149pub struct Input<'parse, I: InputType, E: ParserExtras<I> = extra::Err<I>> {
150        #[doc(hidden)]
151        pub offset: I::Offset,
152        #[doc(hidden)]
153        pub input: &'parse I,
154        // #[doc(hidden)]
155        // pub errors: &'parse mut Errors<I::Offset, E::Error>,
156        // pub(crate) state: &'parse mut E::State,
157        #[doc(hidden)]
158        pub(crate) cx: &'parse E::Context,
159}
160
161impl<'parse, I: InputType, E: ParserExtras<I, Context = ()>> Input<'parse, I, E> {
162        pub fn new(input: &'parse I) -> Self {
163                Self {
164                        offset: input.start(),
165                        input,
166                        cx: &(),
167                }
168        }
169}
170
171impl<'parse, I: InputType, E: ParserExtras<I>> Input<'parse, I, E> {
172        #[inline]
173        pub(crate) fn skip_while(&mut self, mut f: impl FnMut(&I::Token) -> bool) {
174                loop {
175                        // SAFETY: offset was generated by previous call to `Input::next`
176                        let (offset, token) = unsafe { self.input.next(self.offset) };
177                        if token.filter(&mut f).is_none() {
178                                break;
179                        }
180                        self.offset = offset;
181                }
182        }
183
184        #[inline(always)]
185        pub(crate) fn next_inner(&mut self) -> (I::Offset, Option<I::Token>) {
186                // SAFETY: offset was generated by previous call to `Input::next`
187                let (offset, token) = unsafe { self.input.next(self.offset) };
188                self.offset = offset;
189                (self.offset, token)
190        }
191
192        /// # Panics
193        /// A parser, if it returns an error, must put the error into input.errors.alt.
194        /// If a parser returns Err(()), but there is no error in .errors.alt, a panic happens.
195        pub fn parse<O, P: Parser<I, O, E> + ?Sized>(
196                self,
197                parser: &P,
198        ) -> Result<(Self, O), (Self, E::Error)> {
199                parser.parse(self)
200        }
201
202        pub fn check<O, P: Parser<I, O, E>>(
203                self,
204                parser: &P,
205        ) -> Result<(Self, ()), (Self, E::Error)> {
206                parser.check(self)
207        }
208        /// Save the current parse state as a [`Marker`].
209        ///
210        /// You can rewind back to this state later with [`InputRef::rewind`].
211        #[inline(always)]
212        pub fn save(&self) -> Marker<I> {
213                Marker {
214                        offset: self.offset,
215                        err_count: 0, //self.errors.secondary.len(),
216                }
217        }
218
219        /// Reset the parse state to that represented by the given [`Marker`].
220        ///
221        /// You can create a marker with which to perform rewinding using [`InputRef::save`].
222        /// Using a marker from another input is UB. Your parser may explode. You may get a panic.
223        #[inline(always)]
224        pub fn rewind(&mut self, marker: Marker<I>) {
225                // self.errors.secondary.truncate(marker.err_count);
226                self.offset = marker.offset;
227        }
228
229        /// Get the next token in the input by value. Returns `None` if the end of the input has been reached.
230        #[inline(always)]
231        pub fn next(&mut self) -> Option<I::Token> {
232                self.next_inner().1
233        }
234        /// Peek the next token in the input. Returns `None` if the end of the input has been reached.
235        #[inline(always)]
236        pub fn peek(&self) -> Option<I::Token> {
237                // SAFETY: offset was generated by previous call to `Input::next`
238                unsafe { self.input.next(self.offset).1 }
239        }
240        #[inline(always)]
241        pub fn span_since(&self, before: I::Offset) -> Range<I::Offset> {
242                before..self.offset
243        }
244        #[inline(always)]
245        pub fn next_or_eof(&mut self) -> Result<I::Token, E::Error> {
246                let befunge = self.offset;
247                match self.next_inner() {
248                        (_, Some(token)) => Ok(token),
249                        (_, None) => Err(Error::unexpected_eof(
250                                Span::new_usize(self.span_since(befunge)),
251                                None,
252                        )),
253                }
254        }
255}
256
257#[derive(Debug)]
258pub struct Marker<I: InputType> {
259        pub offset: I::Offset,
260        err_count: usize,
261}
262impl<I: InputType> Clone for Marker<I> {
263        fn clone(&self) -> Self {
264                *self
265        }
266}
267impl<I: InputType> Copy for Marker<I> {}
268
269/// Implemented by inputs that represent slice-like streams of input tokens.
270pub trait SliceInput<'a>: ExactSizeInput {
271        /// The unsized slice type of this input. For [`&str`] it's `&'a str`, and for [`&[T]`] it will be `&'a [T]`.
272        type Slice: 'a;
273
274        /// Get the full slice of the input
275        #[doc(hidden)]
276        fn full_slice(&self) -> Self::Slice;
277
278        /// Get a slice from a start and end offset
279        // TODO: Make unsafe
280        #[doc(hidden)]
281        fn slice(&self, range: Range<Self::Offset>) -> Self::Slice;
282
283        /// Get a slice from a start offset till the end of the input
284        // TODO: Make unsafe
285        #[doc(hidden)]
286        fn slice_from(&self, from: RangeFrom<Self::Offset>) -> Self::Slice;
287}
288
289pub trait StrInput<'a, C: Char>:
290        InputType<Offset = usize, Token = C> + SliceInput<'a, Slice = &'a C::Str>
291{
292}
293impl<'a> ExactSizeInput for &'a str {
294        #[inline(always)]
295        unsafe fn span_from(&self, range: RangeFrom<Self::Offset>) -> Range<Self::Offset> {
296                range.start..self.len()
297        }
298}
299impl<'a, T: Clone> ExactSizeInput for &'a [T] {
300        #[inline(always)]
301        unsafe fn span_from(&self, range: RangeFrom<Self::Offset>) -> Range<Self::Offset> {
302                range.start..self.len()
303        }
304}
305// impl<'a, T: Clone + 'a, const N: usize> ExactSizeInput for &'a [T; N] {
306//         #[inline(always)]
307//         unsafe fn span_from(&self, range: RangeFrom<Self::Offset>) -> Range<Self::Offset> {
308//                 (range.start..N).into()
309//         }
310// }
311impl<'a> StrInput<'a, char> for &'a str {}
312
313impl<'a> SliceInput<'a> for &'a str {
314        type Slice = &'a str;
315
316        #[inline(always)]
317        fn full_slice(&self) -> Self::Slice {
318                *self
319        }
320
321        #[inline(always)]
322        fn slice(&self, range: Range<Self::Offset>) -> Self::Slice {
323                &self[range]
324        }
325
326        #[inline(always)]
327        fn slice_from(&self, from: RangeFrom<Self::Offset>) -> Self::Slice {
328                &self[from]
329        }
330}
331
332impl<'a> StrInput<'a, u8> for &'a [u8] {}
333
334impl<'a, T: Clone> SliceInput<'a> for &'a [T] {
335        type Slice = &'a [T];
336
337        #[inline(always)]
338        fn full_slice(&self) -> Self::Slice {
339                *self
340        }
341
342        #[inline(always)]
343        fn slice(&self, range: Range<Self::Offset>) -> Self::Slice {
344                &self[range]
345        }
346
347        #[inline(always)]
348        fn slice_from(&self, from: RangeFrom<Self::Offset>) -> Self::Slice {
349                &self[from]
350        }
351}