minparser/
tools.rs

1/*
2 * Minparser Simple parsing functions
3 *
4 * Copyright (C) 2024-2025 Paolo De Donato
5 *
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
18 */
19//! Parsing tools.
20//!
21//! This module provides the [`View`] object representing a string with its position inside a file,
22//! and the [`Tool`] trait which is implemented by any object that respesents a parsing strategy.
23use crate::pos::{Position};
24
25/// A view on a `str`.
26///
27/// This view carries a [`Position`] with respect of the initial string.
28/// Each time a pattern is matched against a `View` object a new `View` object is returned,
29/// returning the *remaining* part of the view which follows the matched prefix.
30#[derive(Debug, Clone, Copy)]
31pub struct View<'a>{
32    pub(crate) view : &'a str,
33    pub(crate) pos : Position,
34}
35
36impl<'a> From<&'a str> for View<'a> {
37    fn from(s : &'a str) -> Self {
38        Self::new(s)
39    }
40}
41
42impl<'a> View<'a> {
43    /// Creates a new [`View`] object from a string.
44    ///
45    /// The initial position is set at line `0` and column `0`.
46    #[must_use]
47    pub const fn new(view : &'a str) -> Self {
48        Self{
49            view,
50            pos : Position::new_zero(),
51        }
52    }
53    /// Returns the underlying string.
54    #[must_use]
55    pub const fn get_view(&self) -> &'a str{
56        self.view
57    }
58    /// Tests if the underlying string is empty.
59    #[must_use]
60    pub const fn is_empty(&self) -> bool {
61        self.view.is_empty()
62    }
63    /// Returns the position of the first character with respect to the main file.
64    #[must_use]
65    pub const fn top_position(&self) -> &Position {
66        &self.pos
67    }
68    /// Consumes the view and returns the actual [`Position`].
69    #[must_use]
70    pub const fn into_pos(self) -> Position {
71        self.pos
72    }
73    /// Progress the view and its position.
74    ///
75    /// The first element returned is the portion of the string that is skipped.
76    ///
77    /// # Panics
78    /// Panics if `inc` doesn't lie on UTF-8 code point boundaries.
79    #[must_use]
80    pub fn progress(self, inc : usize) -> (&'a str, Self) {
81        if inc == 0 {
82            ("", self)
83        }
84        else{
85            let (pfx, sfx) = self.view.split_at(inc);
86            let mut fit = pfx.split('\n');
87            let mut elem = fit.next().unwrap();// at least one element
88            let mut nls = 0;
89            for el in fit {
90                elem = el;
91                nls += 1;
92            }
93            let (mut r, mut c) = self.pos.unpack();
94            if nls > 0 {
95                c = 0;
96                r += nls;
97            }
98            c += u32::try_from(elem.len()).unwrap();
99            (pfx, Self{
100                pos : Position::new(r, c),
101                view : sfx,
102            })
103        }
104    }
105    /// Match a parsing tool.
106    #[allow(clippy::missing_errors_doc)]
107    pub fn match_tool<T : Tool<'a>>(self, t : &T) -> Result<Self, T::Error> {
108        t.parse(self).map(|i| i.2)
109    }
110    /// Apply a tool that always match.
111    #[allow(clippy::missing_errors_doc)]
112    #[must_use]
113    pub fn match_always<T : AlwaysTool<'a>>(self, t : &T) -> Self {
114        t.parse_always(self).2
115    }
116    /// Matches a parsing tool and returns the matched string.
117    #[allow(clippy::missing_errors_doc)]
118    pub fn match_tool_string<T : Tool<'a>>(self, t : &T) -> Result<(&'a str, Self), T::Error> {
119        let vw = self.get_view();
120        t.parse(self).map(|i| (&vw[0..i.1], i.2))
121    }
122    /// Matches a parsing tool and returns associated data.
123    #[allow(clippy::missing_errors_doc)]
124    pub fn match_tool_data<T : Tool<'a>>(self, t : &T) -> Result<(T::Data, Self), T::Error> {
125        t.parse(self).map(|i| (i.0, i.2))
126    }
127    /// Matches an infallible tool and returns associated data.
128    #[allow(clippy::missing_errors_doc)]
129    pub fn match_always_data<T : AlwaysTool<'a>>(self, t : &T) -> (T::Data, Self) {
130        let i = t.parse_always(self);
131        (i.0, i.2)
132    }
133    /// Matches a parsing tool and returns associated data and the length of the match.
134    #[allow(clippy::missing_errors_doc)]
135    pub fn match_tool_data_len<T : Tool<'a>>(self, t : &T) -> Result<(T::Data, usize, Self), T::Error> {
136        t.parse(self)
137    }
138    /// Matches a tool and applies a transformation on the returned data.
139    #[allow(clippy::missing_errors_doc)]
140    pub fn match_map<D, T, F>(self, t : &T, f : F) -> Result<(D, Self), T::Error> where 
141        T : Tool<'a>,
142        F : FnOnce(T::Data, Position) -> D
143    {
144        self.match_tool_data(t)
145            .map(|(d, s)| (f(d, s.pos), s))
146    }
147    /// Matches a tool and applies a transformation on both the data and the error object.
148    #[allow(clippy::missing_errors_doc)]
149    pub fn match_map_err<D, E, T, F, G>(self, t : &T, f : F, g : G) -> Result<(D, Self), E> where 
150        T : Tool<'a>,
151        F : FnOnce(T::Data, Position) -> D,
152        G : FnOnce(T::Error) -> E 
153    {
154        self.match_tool_data(t)
155            .map(|(d, s)| (f(d, s.pos), s))
156            .map_err(g)
157    }
158    /// Matches a tool only if another tool matches.
159    #[allow(clippy::missing_errors_doc)]
160    pub fn match_if_matches<PRE : Tool<'a>, R : Tool<'a>>(self, pre : &PRE, t : &R) -> Result<Self, R::Error> {
161        self.match_tool(pre).map_or(
162            Ok(self),
163            |next| next.match_tool(t))
164    }
165    /// Match the `th` tool only if `test` tool matches, otherwise execute `els` closure.
166    #[allow(clippy::missing_errors_doc)]
167    pub fn match_if_else_data<PRE : Tool<'a>, R : Tool<'a>, ELS>(self, pre : &PRE, t : &R, els : ELS) -> Result<(R::Data, Self), R::Error> where 
168        ELS : FnOnce(Self) -> Result<(R::Data, Self), R::Error> 
169    {
170        self.match_tool(pre)
171            .map_or_else(
172                |_| els(self),
173                |next|next.match_tool_data(t))
174    }
175    /// Match (with length) the `th` tool only if `test` tool matches, otherwise execute `els` closure.
176    #[allow(clippy::missing_errors_doc)]
177    pub fn match_if_else_data_len<PRE : Tool<'a>, R : Tool<'a>, ELS>(self, pre : &PRE, t : &R, els : ELS) -> Result<(R::Data, usize, Self), R::Error> where 
178        ELS : FnOnce(Self) -> Result<(R::Data, usize, Self), R::Error> 
179    {
180        self.match_tool(pre)
181            .map_or_else(
182                |_| els(self),
183                |next| next.match_tool_data_len(t))
184    }
185}
186
187/// Parsing tool trait.
188///
189/// An object implementing this trait represents a parsing rule that are applied to string
190/// prefixes. If any prefix of a string satisfy this rule then the match is successful and
191/// additional data parsed from the matching prefix is returned. If instead no prefix satisfies the
192/// rule then an error is returned.
193pub trait Tool<'a> {
194    /// Error type.
195    type Error;
196    /// Associated data type.
197    type Data;
198
199    /// The main parsing algorithm.
200    ///
201    /// On a successful match, it additionally returns the parsed data and the length of the match
202    /// in bytes.
203    ///
204    /// # Errors
205    /// If no prefix of `st` satisfies this parsing strategy then `Error` is returned.
206    fn parse(&self, st : View<'a>) -> Result<(Self::Data, usize, View<'a>), Self::Error>;
207}
208/// A parsing tool that always match a prefix.
209///
210/// Any object implementing this trait must also implement the [`Tool`] trait and their
211/// implementation of the [`parse`](Tool::parse) must be equivalent to
212/// `Ok(self.parse_always(st))`.
213///
214/// It is a good practice to set [`core::convert::Infallible`] (or [`!`] when it will become
215/// stable) as `Error`, but it is not mandatory.
216pub trait AlwaysTool<'a> : Tool<'a> {
217    /// The main parsing algorithm.
218    fn parse_always(&self, st : View<'a>) -> (Self::Data, usize, View<'a>);
219
220    /// Automatically discards the generated data.
221    fn parse_always_nodata(&self, st : View<'a>) -> ((), usize, View<'a>) {
222        let (_, l, s) = self.parse_always(st);
223        ((), l, s)
224    }
225}
226
227/// Set arbitrary error type for [`AlwaysTool`].
228///
229/// Some objects here require tools with a specific error type. Tools implementing [`AlwaysTool`]
230/// never issue an error therefore it should be possible to pass them to these objects. This
231/// wrapper reimplement [`Tool`] but allows you to explicitly select any type as error type.
232#[derive(Debug)]
233pub struct SetError<T, E>(pub T, pub ::core::marker::PhantomData<fn() -> E>);
234
235impl<T, E> SetError<T, E> {
236    /// Creates a new `SetError`.
237    pub fn new(d : T) -> Self {
238        Self(d, ::core::marker::PhantomData)
239    }
240}
241impl<T : Clone, E> Clone for SetError<T, E> {
242    fn clone(&self) -> Self {
243        Self::new(self.0.clone())
244    }
245}
246impl<T : Copy, E> Copy for SetError<T, E> {}
247impl<T : Default, E> Default for SetError<T, E> {
248    fn default() -> Self {
249        Self::new(T::default())
250    }
251}
252
253impl<T, E, B> AsRef<B> for SetError<T, E> where T : AsRef<B>, B : ?Sized {
254    fn as_ref(&self) -> &B {
255        self.0.as_ref()
256    }
257}
258
259impl<'a, T, E> AlwaysTool<'a> for SetError<T, E> where T : AlwaysTool<'a> {
260    fn parse_always(&self, st : View<'a>) -> (Self::Data, usize, View<'a>) {
261        self.0.parse_always(st)
262    }
263}
264impl<'a, T, E> Tool<'a> for SetError<T, E> where T : AlwaysTool<'a> {
265    type Error = E;
266    type Data = T::Data;
267
268    fn parse(&self, st : View<'a>) -> Result<(Self::Data, usize, View<'a>), Self::Error> {
269        Ok(self.0.parse_always(st))
270    }
271}
272
273/// Applies a function to both data and error value.
274#[derive(Debug, Copy, Clone)]
275pub struct MapTool<T, FD, FE>(pub T, pub FD, pub FE);
276
277impl<'a, T, D, E, FD, FE> Tool<'a> for MapTool<T, FD, FE> where
278    T : Tool<'a>,
279    FD : Fn(T::Data) -> D,
280    FE : Fn(T::Error) -> E
281{
282    type Data = D;
283    type Error = E;
284
285    fn parse(&self, st : View<'a>) -> Result<(Self::Data, usize, View<'a>), Self::Error> {
286        self.0.parse(st)
287            .map(|(d, l, st)| ((self.1)(d), l, st) )
288            .map_err(&self.2)
289    }
290}
291impl<'a, T, D, E, FD, FE> AlwaysTool<'a> for MapTool<T, FD, FE> where
292    T : AlwaysTool<'a>,
293    FD : Fn(T::Data) -> D,
294    FE : Fn(T::Error) -> E
295{
296    fn parse_always(&self, st : View<'a>) -> (Self::Data, usize, View<'a>) {
297        let (d, l, st) = self.0.parse_always(st);
298        ((self.1)(d), l, st)
299    }
300}
301
302/// Automatically implements [`Tool`] for an object without templates implementing [`AlwaysTool`].
303#[macro_export]
304macro_rules! always_impl {
305    ($i:ident) => {
306        always_impl!($i, ());
307    };
308    ($i:ident, $t:ty) => {
309        impl<'a> Tool<'a> for $i {
310            type Error = core::convert::Infallible;
311            type Data = $t;
312
313            fn parse(&self, st : View<'a>) -> Result<(Self::Data, usize, View<'a>), Self::Error> {
314                Ok(self.parse_always(st))
315            }
316        }
317    }
318}
319
320impl<'a, T> Tool<'a> for &T where T : Tool<'a> + ?Sized {
321    type Error = T::Error;
322    type Data = T::Data;
323
324    fn parse(&self, st : View<'a>) -> Result<(Self::Data, usize, View<'a>), Self::Error>{
325        T::parse(*self, st)
326    }
327}
328impl<'a, T> AlwaysTool<'a> for &T where T : AlwaysTool<'a> + ?Sized {
329    fn parse_always(&self, st : View<'a>) -> (Self::Data, usize, View<'a>) {
330        T::parse_always(*self, st)
331    }
332}
333
334/// Matches exactly the first character.
335impl<'a> Tool<'a> for char {
336    type Error = View<'a>;
337    type Data = Self;
338
339    fn parse(&self, st : View<'a>) -> Result<(Self::Data, usize, View<'a>), Self::Error>{
340        st.get_view().chars().next()
341            .and_then(|c| if c == *self {
342                Some((c, c.len_utf8(), st.progress(c.len_utf8()).1))
343            }
344            else {
345                None
346            })
347        .ok_or(st)
348    }
349}
350
351/// Matches exactly the string prefix.
352impl<'a> Tool<'a> for str {
353    type Error = View<'a>;
354    #[allow(clippy::needless_borrows_for_generic_args, clippy::use_self)]
355    type Data = &'a str;
356
357    fn parse(&self, st : View<'a>) -> Result<(Self::Data, usize, View<'a>), Self::Error>{
358        if st.get_view().starts_with(self) {
359            Ok((&st.get_view()[0..self.len()], self.len(), st.progress(self.len()).1))
360        }
361        else {
362            Err(st)
363        }
364    }
365}
366
367/// Matches any tool in the given slice.
368///
369/// Tools are evaluated with increasing index ordering, so a later tool would not be tested is a
370/// previous tool has already matched.
371impl<'a, T> Tool<'a> for [T] where T : Tool<'a> {
372    type Data = (usize, T::Data);
373    /// Only the last error is returned.
374    ///
375    /// If you want to keep track of every returned error, use instead multiple nested instances of
376    /// [`Or`](crate::moretools::Or) tool.
377    type Error = Option<T::Error>;
378
379    fn parse(&self, st : View<'a>) -> Result<(Self::Data, usize, View<'a>), Self::Error>{
380        let mut err = None;
381
382        for (i, t) in self.iter().enumerate() {
383            match t.parse(st) {
384                Ok(r) => return Ok(((i, r.0), r.1, r.2)),
385                Err(e) => err = Some(e),
386            }
387        }
388        Err(err)
389    }
390}
391
392/// Matches any tool in the given array.
393///
394/// Tools are evaluated with increasing index ordering, so a later tool would not be tested is a
395/// previous tool has already matched.
396impl<'a, T, const N : usize> Tool<'a> for [T; N] where T : Tool<'a> {
397    type Data = (usize, T::Data);
398    /// Only the last error is returned.
399    ///
400    /// If you want to keep track of every returned error, use instead multiple nested instances of
401    /// [`Or`](crate::moretools::Or) tool.
402    type Error = Option<T::Error>;
403
404    fn parse(&self, st : View<'a>) -> Result<(Self::Data, usize, View<'a>), Self::Error>{
405        let mut err = None;
406
407        for (i, t) in self.iter().enumerate() {
408            match t.parse(st) {
409                Ok(r) => return Ok(((i, r.0), r.1, r.2)),
410                Err(e) => err = Some(e),
411            }
412        }
413        Err(err)
414    }
415}
416
417
418/// A tool that matches the empty prefix.
419#[derive(Debug, Clone, Copy, Eq, PartialEq, Default)]
420pub struct TrueTool;
421
422impl<'a> AlwaysTool<'a> for TrueTool {
423    fn parse_always(&self, st : View<'a>) -> (Self::Data, usize, View<'a>) {
424        ((), 0, st)
425    }
426}
427always_impl!(TrueTool);
428
429/// Tool that matches characters satisfying a predicate.
430#[derive(Debug, Clone, Copy)]
431pub struct Predicate<P>{
432    pub(crate) predicate : P,
433}
434
435impl<P> Predicate<P> {
436    /// Create a new [`Predicate`] from a predicate.
437    pub const fn new(predicate : P) -> Self {
438        Self{
439            predicate,
440        }
441    }
442}
443
444impl<'a, P : Fn(char) -> bool> Tool<'a> for Predicate<P>{
445    type Data = char;
446    type Error = View<'a>;
447
448    fn parse(&self, st : View<'a>) -> Result<(Self::Data, usize, View<'a>), Self::Error>{
449        st.get_view().chars().next().and_then(
450                |c| if (self.predicate)(c) {
451                    Some((c, c.len_utf8(), st.progress(c.len_utf8()).1))
452                }
453                else {
454                    None
455                })
456            .ok_or(st)
457    }
458}
459/// Tool that matches characters satisfying a predicate that also transform the matched character.
460#[derive(Debug, Clone, Copy)]
461pub struct PredicateData<P>{
462    pub(crate) predicate : P,
463}
464
465impl<P> PredicateData<P> {
466    /// Create a new [`PredicateData`] from a predicate.
467    pub const fn new(predicate : P) -> Self {
468        Self{
469            predicate,
470        }
471    }
472}
473
474impl<D, P : Fn(char) -> Option<D> > PredicateData<P> {
475    /// Tests if the first character satisfy the predicate, and in the affirmative case the matched
476    /// character and its length are returned.
477    pub fn parse_char(&self, st : &str) -> Option<(D, usize)> {
478        st.chars().next().and_then(|c| {
479            (self.predicate)(c).map(|d| (d, c.len_utf8()))
480        })
481    }
482}
483impl<'a, D, P : Fn(char) -> Option<D>> Tool<'a> for PredicateData<P>{
484    type Data = D;
485    type Error = View<'a>;
486
487    fn parse(&self, st : View<'a>) -> Result<(Self::Data, usize, View<'a>), Self::Error>{
488        st.get_view().chars().next().and_then(
489                |c| (self.predicate)(c).map(|d| (d, c.len_utf8())))
490            .map(|(d, l)| (d, l, st.progress(l).1))
491            .ok_or(st)
492    }
493}
494
495/// Matches any single character.
496///
497/// It is exactly the opposite of [`EOFTool`].
498#[derive(Debug, Copy, Clone, Default)]
499pub struct AnyChar;
500
501impl<'a> Tool<'a> for AnyChar {
502    type Data = char;
503    type Error = View<'a>;
504
505    fn parse(&self, st : View<'a>) -> Result<(Self::Data, usize, View<'a>), Self::Error>{
506        st.get_view().chars().next()
507            .map(|c| (c, c.len_utf8(), st.progress(c.len_utf8()).1))
508            .ok_or(st)
509    }
510}
511
512/// Parses only the end of the input.
513///
514/// ```rust
515/// use minparser::prelude::*;
516/// let st = View::from("My data ");
517/// st.match_tool(&"My data ").unwrap().match_tool(&EOFTool).unwrap();
518/// ```
519#[derive(Debug, Clone, Copy, Default)]
520pub struct EOFTool;
521
522impl<'a> Tool<'a> for EOFTool{
523    type Data = ();
524    type Error = View<'a>;
525
526    fn parse(&self, st : View<'a>) -> Result<(Self::Data, usize, View<'a>), Self::Error>{
527        if st.is_empty() {
528            Ok(((), 0, st))
529        }
530        else{
531            Err(st)
532        }
533    }
534}
535
536/// Discards empty strings from a match.
537///
538/// Some atoms that needs to match an undefined number of other atoms (like
539/// [`Repeat`](crate::prelude::Repeat) or [`LazyRepeat`](crate::prelude::LazyRepeat))
540/// may enter in an infinite loop if the inner atom matches an empty string `""`. 
541/// In that case there is always a match but the atom does not progress, resulting so in an
542/// endless cycle.
543///
544/// This tool takes another tool and converts any match with an empty string with a missing match,
545/// avoiding this issue.
546#[derive(Debug, Clone, Copy, Default)]
547pub struct NonEmpty<P>(pub P);
548
549impl<'a, P> Tool<'a> for NonEmpty<P> where P : Tool<'a> {
550    type Data = P::Data;
551    type Error = Option<P::Error>;
552
553    fn parse(&self, st : View<'a>) -> Result<(Self::Data, usize, View<'a>), Self::Error>{
554        self.0.parse(st)
555            .map_err(Some)
556            .and_then(|d| if d.1 > 0 {Ok(d)} else {Err(None)})
557    }
558}