nom_supreme/
final_parser.rs

1//! Entry point layer into nom parsers. See [`final_parser`] for details.
2
3use core::fmt::{self, Display, Formatter};
4
5use nom::{
6    error::{Error, ErrorKind, ParseError},
7    Err as NomErr, InputLength, Offset, Parser,
8};
9
10use nom::error::VerboseError;
11
12use crate::parser_ext::ParserExt;
13
14/// Trait for types which represent an absolute location of a parse error in
15/// an input
16///
17/// This trait is implemented for "full location context" types; that is, for
18/// types that capture the exact location (and possibly other related context)
19/// of a parse error for the purpose of reporting. Two such types are provided
20/// by `nom-supreme`: [`Location`], which captures the line and column number
21/// of the error, and [`ByteOffset`], which captures the byte offset of the
22/// error.
23///
24/// This trait is usually paired with [`ExtractContext`], a trait used by
25/// nom error types to convert the stored input tail types into location
26/// contexts. [`ExtractContext`] is in turn used by [`final_parser`] to create
27/// & return parse errors with the full attached context.
28pub trait RecreateContext<I>: Sized {
29    /// Given the *original* input, as well as the context reported by nom,
30    /// recreate a context in the original string where the error occurred.
31    ///
32    /// This function may return garbage or panic if the tail is not associated
33    /// with the original input.
34    fn recreate_context(original_input: I, tail: I) -> Self;
35}
36
37impl<I> RecreateContext<I> for I {
38    fn recreate_context(_original_input: I, tail: I) -> Self {
39        tail
40    }
41}
42
43/// A byte offset into the input where an error may have occurred
44///
45/// # `&str` Example
46///
47/// ```
48/// use nom_supreme::final_parser::{ByteOffset, RecreateContext};
49///
50/// let original_input = "Hello,\nWorld!\n";
51/// let tail = &original_input[9..];
52///
53/// assert_eq!(
54///    ByteOffset::recreate_context(original_input, tail),
55///    ByteOffset(9),
56/// );
57/// ```
58///
59/// `# &[u8]` Example
60///
61/// ```
62/// use nom_supreme::final_parser::{ByteOffset, RecreateContext};
63///
64/// let original_input = b"GET / HTTP/1.1\r\n";
65/// let tail = &original_input[6..];
66///
67/// assert_eq!(
68///    ByteOffset::recreate_context(&original_input[..], tail),
69///    ByteOffset(6),
70/// );
71/// ```
72#[derive(Debug, Clone, Copy, PartialEq, Eq)]
73pub struct ByteOffset(pub usize);
74
75impl<I: Offset> RecreateContext<I> for ByteOffset {
76    fn recreate_context(original_input: I, tail: I) -> Self {
77        ByteOffset(original_input.offset(&tail))
78    }
79}
80
81/// A location in a string where an error may have occurred. In keeping with
82/// the typical practice from editors and other tools, line and columns are both
83/// 1-indexed.
84///
85/// # Example
86///
87/// ```
88/// use nom_supreme::final_parser::{RecreateContext, Location};
89///
90/// let original_input = "Hello,\nWorld!\n";
91/// let tail = &original_input[9..];
92///
93/// assert_eq!(
94///    Location::recreate_context(original_input, tail),
95///    Location { line: 2, column: 3 },
96/// );
97/// ```
98#[derive(Debug, Clone, Copy, PartialEq, Eq)]
99pub struct Location {
100    /// The line number in the original input where the error occurred
101    pub line: usize,
102
103    /// The column of the line in the original input where the error occurred
104    pub column: usize,
105}
106
107impl Location {
108    /// Given the *original* input string, as well as the context reported by
109    /// nom, compute the location in the original string where the error
110    /// occurred.
111    ///
112    /// This function will report garbage (and may panic) if the context is not
113    /// associated with the input
114    pub fn locate_tail<'a>(original_input: &'a str, tail: &'a str) -> Self {
115        let offset = original_input.offset(tail);
116        let input_bytes = original_input.as_bytes();
117        let prefix = &input_bytes[..offset];
118
119        let line_number = memchr::memchr_iter(b'\n', prefix).count() + 1;
120
121        let last_line_start = memchr::memrchr(b'\n', prefix).map(|i| i + 1).unwrap_or(0);
122        let last_line = &prefix[last_line_start..];
123        let column_number = last_line.len() + 1;
124
125        Location {
126            line: line_number,
127            column: column_number,
128        }
129    }
130}
131
132impl Display for Location {
133    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
134        if f.alternate() {
135            write!(f, "line {}, column {}", self.line, self.column)
136        } else {
137            write!(f, "{}:{}", self.line, self.column)
138        }
139    }
140}
141
142impl RecreateContext<&str> for Location {
143    fn recreate_context(original_input: &str, tail: &str) -> Self {
144        Location::locate_tail(original_input, tail)
145    }
146}
147
148/// Trait for recombining error information with the original input.
149///
150/// This trait is used to take the context information attached to nom errors-
151/// specifically, the tail of the input indicating the location of the input-
152/// and recombine it with the *original* input to produce an error with
153/// something more useful for error reporting.
154///
155/// Typically, it looks like  `ExtractContext<I, E<T>> for E<I>`. This
156/// indicates that some error type `E`, which is generic over the input type,
157/// can be converted into another variant of that error, using `T` instead of
158/// `I` to hold the result context. Often this context conversion can happen
159/// with [`RecreateContext`].
160pub trait ExtractContext<I, T> {
161    /// Given the context attached to a nom error, and given the *original*
162    /// input to the nom parser, extract more the useful context information.
163    ///
164    /// For example, for a string, 1 possible context extraction would be the
165    /// Location (line and column number) in the original input where the error
166    /// indicated by self occurred.
167    fn extract_context(self, original_input: I) -> T;
168}
169
170impl<I, T> ExtractContext<I, ()> for T {
171    fn extract_context(self, _original_input: I) {}
172}
173
174impl<I, T> ExtractContext<I, (T, ErrorKind)> for (I, ErrorKind)
175where
176    T: RecreateContext<I>,
177{
178    fn extract_context(self, original_input: I) -> (T, ErrorKind) {
179        let (tail, kind) = self;
180        (T::recreate_context(original_input, tail), kind)
181    }
182}
183
184impl<I, T> ExtractContext<I, Error<T>> for Error<I>
185where
186    T: RecreateContext<I>,
187{
188    fn extract_context(self, original_input: I) -> Error<T> {
189        Error::new(T::recreate_context(original_input, self.input), self.code)
190    }
191}
192
193impl<I, T> ExtractContext<I, VerboseError<T>> for VerboseError<I>
194where
195    T: RecreateContext<I>,
196    I: Clone,
197{
198    fn extract_context(self, original_input: I) -> VerboseError<T> {
199        VerboseError {
200            errors: self
201                .errors
202                .into_iter()
203                .map(|(input, kind)| (T::recreate_context(original_input.clone(), input), kind))
204                .collect(),
205        }
206    }
207}
208
209/// `extract_context` can be used directly on `Result` values that have an
210/// error with `ExtractContext`
211impl<T, E1, E2, I> ExtractContext<I, Result<T, E2>> for Result<T, E1>
212where
213    E1: ExtractContext<I, E2>,
214{
215    fn extract_context(self, original_input: I) -> Result<T, E2> {
216        self.map_err(move |err| err.extract_context(original_input))
217    }
218}
219
220/// Bootstrapping layer for a nom parser.
221///
222/// This function is intended to be the entry point into a nom parser; it
223/// represents in some sense the "end of composability". It creates a function
224/// which applies a parser to an input. The parser is configured such that it
225/// must parse the *entire* input, and any "Incomplete" responses are reported
226/// as errors. Additionally, if the parser returns an error, the context
227/// information in the error is recombined with the original input via
228/// `ExtractContext` to create a more useful error.
229pub fn final_parser<I, O, E, E2>(parser: impl Parser<I, O, E>) -> impl FnMut(I) -> Result<O, E2>
230where
231    E: ParseError<I> + ExtractContext<I, E2>,
232    I: InputLength + Clone,
233{
234    let mut parser = parser.complete().all_consuming();
235
236    move |input| match parser.parse(input.clone()) {
237        Ok((_, parsed)) => Ok(parsed),
238        Err(NomErr::Error(err)) | Err(NomErr::Failure(err)) => Err(err.extract_context(input)),
239        Err(NomErr::Incomplete(..)) => {
240            unreachable!("Complete combinator should make this impossible")
241        }
242    }
243}
nom_supreme/final_parser.rs

nom_supreme/
final_parser.rs