1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
//! Zero-dependency library for writing parsers in a concise functional style & with exhaustive
//! error-reporting.
//!
//! Kinds of errors are distinguished via a user-defined `Expectation` type, which signals what did
//! a parser expect.
//! A [`ParsingError`] can also have no expectation, which will mean that the error is recoverable.
//! Some built-in parsers can have `Infallible` as their expectation, which means that any error
//! the parser may ever return is recoverable.
//! The distinction between recoverable & fatal errors is important for parsers that need to try
//! multiple options.
//!
//! Error reporting with precise location in the source is facilitated by methods such as
//! [`Parser::parse_with_err_loc`], [`ParsingError::with_src_loc`]

pub mod tuple;

use core::{fmt::{Display, Formatter}, num::NonZeroU32};
use core::{convert::Infallible, fmt::Debug};
use std::{error::Error, path::Path};

use tuple::{first, map_second, rev, second, tuple, Tuple};

#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub struct Location {
    line: NonZeroU32,
    col: u32,
}

impl Default for Location {
    fn default() -> Self {
        Self {
            line: unsafe { NonZeroU32::new(1).unwrap_unchecked() },
            col: 0
        }
    }
}

impl Display for Location {
    fn fmt(&self, f: &mut Formatter) -> core::fmt::Result {
        write!(f, "{}:{}", self.line, self.col)
    }
}

#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub struct ParsingError<'rest, Expectation> {
    rest: &'rest str,
    /// `None` means that the error is recoverable
    expected: Option<Expectation>,
}

#[derive(Debug)]
pub struct FullParsingError<'path, Expectation> {
    at: Location,
    path: &'path Path,
    expected: Option<Expectation>,
}

impl<Expectation: Display> Display for FullParsingError<'_, Expectation> {
    fn fmt(&self, f: &mut Formatter) -> core::fmt::Result {
        write!(f, "parsing error at {}:{}", self.path.display(), self.at)?;
        if let Some(expected) = &self.expected {
            write!(f, ": {expected}")?;
        }
        Ok(())
    }
}

impl<Expectation: Error> Error for FullParsingError<'_, Expectation> {}

impl<'rest, Expectation> ParsingError<'rest, Expectation> {
    pub const fn new(rest: &'rest str, expected: Expectation) -> Self {
        Self { rest, expected: Some(expected) }
    }

    pub const fn new_recoverable(rest: &'rest str) -> Self {
        Self { rest, expected: None }
    }

    pub const fn is_recoverable(&self) -> bool {
        self.expected.is_none()
    }

    pub fn expectation<NewExpectation>(self, expected: NewExpectation)
        -> ParsingError<'rest, NewExpectation>
    {
        ParsingError { expected: Some(expected), rest: self.rest }
    }

    pub fn narrow_expectation<NewExpectation>(self, expected: NewExpectation)
        -> ParsingError<'rest, NewExpectation>
    {
        ParsingError { expected: self.expected.map(|_| expected), rest: self.rest }
    }

    pub fn with_src_loc<'path>(self, path: &'path Path, src: &str)
        -> FullParsingError<'path, Expectation>
    {
        let Some(progress) = src.len().checked_sub(self.rest.len()) else {
            return FullParsingError { at: Location::default(), expected: self.expected, path }
        };

        FullParsingError {
            at: src.bytes().take(progress).fold(Location::default(), |loc, b| match b {
                b'\n' => Location { line: loc.line.saturating_add(1), col: 0 },
                _ => Location { col: loc.col.saturating_add(1), ..loc },
            }),
            expected: self.expected,
            path,
        }
    }
}

/// The result of a parser.
pub type ParsingResult<'src, T = (), Expectation = Infallible> = core::result::Result<
    (&'src str, T),
    ParsingError<'src, Expectation>,
>;

/// The core of the crate, a trait representing a function that takes some string as input and
/// returns either a tuple of (the rest of the input, the output) or a [`ParsingError`].
pub trait Parser<'input, T = (), Expectation = Infallible>:
    Sized + FnOnce(&'input str) -> ParsingResult<'input, T, Expectation>
{
    /// Turns output into a recoverable error if the output doesn't meet a condition.
    fn filter(self, f: impl FnOnce(&T) -> bool)
        -> impl FnOnce(&'input str)
        -> ParsingResult<'input, T, Expectation>
    {
        move |input| match self(input) {
            Ok((rest, res)) if f(&res) => Ok((rest, res)),
            Ok(_) => Err(ParsingError::new_recoverable(input)),
            Err(err) => Err(err),
        }
    }

    /// Transforms the first output of the parser, if present.
    fn map<U>(self, f: impl FnOnce(T) -> U)
        -> impl FnOnce(&'input str)
        -> ParsingResult<'input, U, Expectation>
    {
        move |input| self(input).map(map_second(f))
    }

    /// Replaces a recoverable error with the result of `parser`.
    fn or(self, parser: impl FnOnce(&'input str) -> ParsingResult<'input, T, Expectation>)
        -> impl FnOnce(&'input str)
        -> ParsingResult<'input, T, Expectation>
    {
        move |input| match self(input) {
            Ok(res) => Ok(res),
            Err(err) if err.is_recoverable() => parser(input),
            Err(err) => Err(err),
        }
    }

    /// Replaces a recoverable error with the transformed remains of the input.
    /// The returned remains of the input are an empty string.
    fn or_map_rest(self, f: impl FnOnce(&'input str) -> T)
        -> impl FnOnce(&'input str)
        -> ParsingResult<'input, T, Expectation>
    {
        move |input| match self(input) {
            Ok(res) => Ok(res),
            Err(err) if err.is_recoverable() => Ok(("", f(err.rest))),
            Err(err) => Err(err),
        }
    }

    /// Replaces a recoverable error with `value` & the rest of the input in the recoverable error.
    fn or_value(self, value: T)
        -> impl FnOnce(&'input str)
        -> ParsingResult<'input, T, Expectation>
    {
        move |input| match self(input) {
            Ok(res) => Ok(res),
            Err(err) if err.is_recoverable() => Ok((err.rest, value)),
            Err(err) => Err(err),
        }
    }

    /// Parses the rest of the input after the first parser, returning both outputs
    /// & short-circuiting on an error.
    /// See also [`Parser::add`].
    fn and<U>(self, parser: impl FnOnce(&'input str) -> ParsingResult<'input, U, Expectation>)
        -> impl FnOnce(&'input str)
        -> ParsingResult<'input, (T, U), Expectation>
    {
        move |input| {
            let (rest, out) = self(input)?;
            let (rest, new_out) = parser(rest)?;
            Ok((rest, (out, new_out)))
        }
    }

    /// Like [`Parser::and`], but specific to parsers that output a tuple:
    /// the new output is appended to the tuple of other tuples using the [`Tuple`] trait.
    fn add<U>(self, parser: impl FnOnce(&'input str) -> ParsingResult<'input, U, Expectation>)
        -> impl FnOnce(&'input str)
        -> ParsingResult<'input, T::Appended<U>, Expectation>
    where
        T: Tuple,
    {
        move |input| {
            let (rest, out) = self(input)?;
            let (rest, new_out) = parser(rest)?;
            Ok((rest, out.append(new_out)))
        }
    }

    /// Like [`Parser::and`], but discards the output of the first parser.
    fn then<U>(self, parser: impl FnOnce(&'input str) -> ParsingResult<'input, U, Expectation>)
        -> impl FnOnce(&'input str)
        -> ParsingResult<'input, U, Expectation>
    {
        move |input| {
            let rest = self(input)?.0;
            let (rest, out) = parser(rest)?;
            Ok((rest, out))
        }
    }

    /// Same as [`Parser::and`] but discards the output of the second parser
    fn skip<U>(self, parser: impl FnOnce(&'input str) -> ParsingResult<'input, U, Expectation>)
        -> impl FnOnce(&'input str)
        -> ParsingResult<'input, T, Expectation>
    {
        move |input| {
            let (rest, out) = self(input)?;
            let rest = parser(rest)?.0;
            Ok((rest, out))
        }
    }

    /// Same as [`Parser::skip`] but discards the error of the second parser as well.
    /// Effectively, all this function does is advance the input to right after the second parser,
    /// if it succeeds, otherwise the input stays as if only the first parser was called.
    fn maybe_skip<U>(self, parser: impl FnOnce(&'input str) -> ParsingResult<'input, U, Expectation>)
        -> impl FnOnce(&'input str)
        -> ParsingResult<'input, T, Expectation>
    {
        move |input| {
            let (rest, out) = self(input)?;
            let rest = parser(rest).map_or(rest, first);
            Ok((rest, out))
        }
    }

    /// Sets the expectation from the parser, making all errors unrecoverable.
    fn expect<NewExpectation>(self, expected: NewExpectation)
        -> impl FnOnce(&'input str)
        -> ParsingResult<'input, T, NewExpectation>
    {
        move |input| self(input).map_err(|e| e.expectation(expected))
    }

    /// Changes the expectation from the parser, unless the error is recoverable.
    fn narrow_expectation<NewExpectation>(self, expected: NewExpectation)
        -> impl FnOnce(&'input str)
        -> ParsingResult<'input, T, NewExpectation>
    {
        move |input| self(input).map_err(|e| e.narrow_expectation(expected))
    }

    /// Adds the part of the input that was consumed by the parser to the outputs.
    /// If the input increased in length after the parser (which should not happen), an empty
    /// string is added.
    /// See also [`Parser::add_span`], which adds the span to the tuple of other outputs.
    fn get_span(self)
        -> impl FnOnce(&'input str)
        -> ParsingResult<'input, (T, &'input str), Expectation>
    {
        self.map(tuple).add_span()
    }

    /// Like [`Parser::get_span`], but adds the output to the tuple of other outputs using the
    /// [`Tuple`] trait.
    fn add_span(self)
        -> impl FnOnce(&'input str)
        -> ParsingResult<'input, T::Appended<&'input str>, Expectation>
    where
        T: Tuple,
    {
        move |input| {
            let (rest, out) = self(input)?;
            let consumed = unsafe {
                input.get_unchecked(.. input.len().saturating_sub(rest.len())) 
            };
            Ok((rest, out.append(consumed)))
        }
    }

    /// Repeats the parser until a recoverable error is met, discarding all the output.
    /// Beware parsers with non-trivially cloneable captured variables: the parser is called
    /// repeatedly by being cloned.
    fn repeat(self) -> impl FnOnce(&'input str) -> ParsingResult<'input, (), Expectation>
    where
        Self: Clone
    {
        move |input| loop {
            match self.clone()(input) {
                Ok(_) => (),
                Err(err) if err.is_recoverable() => return Ok((err.rest, ())),
                Err(err) => return Err(err),
            }
        }
    }

    /// Calls the parser and augments the parsing error, if present, with location in the input.
    /// `path` is the reported path to the file where the error occured.
    fn parse_with_err_loc<'path>(self, path: &'path Path, input: &'input str)
        -> Result<T, FullParsingError<'path, Expectation>>
    {
        self(input).map_err(|e| e.with_src_loc(path, input)).map(second)
    }
}

impl<'input, T, Expectation, F> Parser<'input, T, Expectation> for F
where
    F: FnOnce(&'input str) -> ParsingResult<'input, T, Expectation>,
{}

/// Strips any 1 character from the input.
/// Returns the parsed character or a recoverable error.
/// See also [`parse_char`]
pub fn parse_any_char(input: &str) -> ParsingResult<char> {
    let mut iter = input.chars();
    iter.next().map(|ch| (iter.as_str(), ch)).ok_or(ParsingError::new_recoverable(input))
}

/// Strips exactly 1 character `ch` from the input.
/// Returns `ch` or a recoverable error.
/// See also [`parse_any_char`]
pub fn parse_char<'input>(ch: char) -> impl Parser<'input, char> {
    parse_any_char.filter(move |x| *x == ch)
}

/// Strips exactly 1 string `prefix` from the input.
/// Returns `prefix` or a recoverable error.
pub fn parse_exact(prefix: &str) -> impl Parser<&str> {
    move |input| prefix.strip_prefix(prefix)
        .map(|rest| (rest, prefix))
        .ok_or(ParsingError::new_recoverable(input))
}

/// Strips characters from the input until `pred` returns `true`, i.e. while it returns `false`.
/// Returns the consumed string slice or a recoverable error.
/// See also [`parse_while`]
pub fn parse_until<'input>(pred: impl Fn(&char) -> bool) -> impl Parser<'input, &'input str> {
    move |input| input.find(|c| pred(&c))
        .map(|id| input.split_at(id).rev())
        .ok_or(ParsingError::new_recoverable(input))
}

/// Strips characters from the input until `delimiter` is met.
/// The `delimiter` is omitted from both the output and the rest of the input.
/// Returns the string consumed before the `delimiter` or a recoverable error.
pub fn parse_until_exact(delimiter: &str) -> impl Parser<&str> {
    move |input| input.split_once(delimiter)
        .map(rev)
        .ok_or(ParsingError::new_recoverable(input))
}

/// The output is a string up to the point where `pred` returned `true`
/// any returned error is recoverable
pub fn parse_while<'input>(pred: impl Fn(&char) -> bool) -> impl Parser<'input, &'input str> {
    parse_until(move |c| !pred(c))
}

/// Parse a balanced group of `open` & `close` characters.
/// Returns the group without the initial `open` & the final `close`, or:
/// - If no initial `open` was found, a recoverable error is returned.
/// - If the end was reached before a matching `close` character, a fatal error is returned.
/// An example use of this is parsing balanced parentheses:
/// ```rust
/// # fn main() {
/// use shrimple_parsing::{parse_group, ParsingError};
/// let input = "(foo) bar";
/// assert_eq!(parse_group('(', ')')(input), Ok((" bar", "foo")));
/// let input = "(oops";
/// assert_eq!(parse_group('(', ')')(input), Err(ParsingError::new("(oops", ())));
/// # }
/// ```
pub fn parse_group<'input>(open: char, close: char) -> impl Parser<'input, &'input str, ()> {
    move |original_input| {
        let (input, _) = parse_char(open).expect(())(original_input)?;
        let mut depth = 0usize;
        for (at, ch) in input.char_indices() {
            if ch == close {
                if depth == 0 {
                    let (res, src) = input.split_at(at);
                    return Ok((&src[1..], res));
                }
                depth -= 1
            } else if ch == open {
                depth += 1
            }
        }
        Err(ParsingError::new(original_input, ()))
    }
}