nom_supreme/final_parser.rs
1//! Entry point layer into nom parsers. See [`final_parser`] for details.
2
3use core::fmt::{self, Display, Formatter};
4
5use nom::{
6 error::{Error, ErrorKind, ParseError},
7 Err as NomErr, InputLength, Offset, Parser,
8};
9
10use nom::error::VerboseError;
11
12use crate::parser_ext::ParserExt;
13
14/// Trait for types which represent an absolute location of a parse error in
15/// an input
16///
17/// This trait is implemented for "full location context" types; that is, for
18/// types that capture the exact location (and possibly other related context)
19/// of a parse error for the purpose of reporting. Two such types are provided
20/// by `nom-supreme`: [`Location`], which captures the line and column number
21/// of the error, and [`ByteOffset`], which captures the byte offset of the
22/// error.
23///
24/// This trait is usually paired with [`ExtractContext`], a trait used by
25/// nom error types to convert the stored input tail types into location
26/// contexts. [`ExtractContext`] is in turn used by [`final_parser`] to create
27/// & return parse errors with the full attached context.
28pub trait RecreateContext<I>: Sized {
29 /// Given the *original* input, as well as the context reported by nom,
30 /// recreate a context in the original string where the error occurred.
31 ///
32 /// This function may return garbage or panic if the tail is not associated
33 /// with the original input.
34 fn recreate_context(original_input: I, tail: I) -> Self;
35}
36
37impl<I> RecreateContext<I> for I {
38 fn recreate_context(_original_input: I, tail: I) -> Self {
39 tail
40 }
41}
42
43/// A byte offset into the input where an error may have occurred
44///
45/// # `&str` Example
46///
47/// ```
48/// use nom_supreme::final_parser::{ByteOffset, RecreateContext};
49///
50/// let original_input = "Hello,\nWorld!\n";
51/// let tail = &original_input[9..];
52///
53/// assert_eq!(
54/// ByteOffset::recreate_context(original_input, tail),
55/// ByteOffset(9),
56/// );
57/// ```
58///
59/// `# &[u8]` Example
60///
61/// ```
62/// use nom_supreme::final_parser::{ByteOffset, RecreateContext};
63///
64/// let original_input = b"GET / HTTP/1.1\r\n";
65/// let tail = &original_input[6..];
66///
67/// assert_eq!(
68/// ByteOffset::recreate_context(&original_input[..], tail),
69/// ByteOffset(6),
70/// );
71/// ```
72#[derive(Debug, Clone, Copy, PartialEq, Eq)]
73pub struct ByteOffset(pub usize);
74
75impl<I: Offset> RecreateContext<I> for ByteOffset {
76 fn recreate_context(original_input: I, tail: I) -> Self {
77 ByteOffset(original_input.offset(&tail))
78 }
79}
80
81/// A location in a string where an error may have occurred. In keeping with
82/// the typical practice from editors and other tools, line and columns are both
83/// 1-indexed.
84///
85/// # Example
86///
87/// ```
88/// use nom_supreme::final_parser::{RecreateContext, Location};
89///
90/// let original_input = "Hello,\nWorld!\n";
91/// let tail = &original_input[9..];
92///
93/// assert_eq!(
94/// Location::recreate_context(original_input, tail),
95/// Location { line: 2, column: 3 },
96/// );
97/// ```
98#[derive(Debug, Clone, Copy, PartialEq, Eq)]
99pub struct Location {
100 /// The line number in the original input where the error occurred
101 pub line: usize,
102
103 /// The column of the line in the original input where the error occurred
104 pub column: usize,
105}
106
107impl Location {
108 /// Given the *original* input string, as well as the context reported by
109 /// nom, compute the location in the original string where the error
110 /// occurred.
111 ///
112 /// This function will report garbage (and may panic) if the context is not
113 /// associated with the input
114 pub fn locate_tail<'a>(original_input: &'a str, tail: &'a str) -> Self {
115 let offset = original_input.offset(tail);
116 let input_bytes = original_input.as_bytes();
117 let prefix = &input_bytes[..offset];
118
119 let line_number = memchr::memchr_iter(b'\n', prefix).count() + 1;
120
121 let last_line_start = memchr::memrchr(b'\n', prefix).map(|i| i + 1).unwrap_or(0);
122 let last_line = &prefix[last_line_start..];
123 let column_number = last_line.len() + 1;
124
125 Location {
126 line: line_number,
127 column: column_number,
128 }
129 }
130}
131
132impl Display for Location {
133 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
134 if f.alternate() {
135 write!(f, "line {}, column {}", self.line, self.column)
136 } else {
137 write!(f, "{}:{}", self.line, self.column)
138 }
139 }
140}
141
142impl RecreateContext<&str> for Location {
143 fn recreate_context(original_input: &str, tail: &str) -> Self {
144 Location::locate_tail(original_input, tail)
145 }
146}
147
148/// Trait for recombining error information with the original input.
149///
150/// This trait is used to take the context information attached to nom errors-
151/// specifically, the tail of the input indicating the location of the input-
152/// and recombine it with the *original* input to produce an error with
153/// something more useful for error reporting.
154///
155/// Typically, it looks like `ExtractContext<I, E<T>> for E<I>`. This
156/// indicates that some error type `E`, which is generic over the input type,
157/// can be converted into another variant of that error, using `T` instead of
158/// `I` to hold the result context. Often this context conversion can happen
159/// with [`RecreateContext`].
160pub trait ExtractContext<I, T> {
161 /// Given the context attached to a nom error, and given the *original*
162 /// input to the nom parser, extract more the useful context information.
163 ///
164 /// For example, for a string, 1 possible context extraction would be the
165 /// Location (line and column number) in the original input where the error
166 /// indicated by self occurred.
167 fn extract_context(self, original_input: I) -> T;
168}
169
170impl<I, T> ExtractContext<I, ()> for T {
171 fn extract_context(self, _original_input: I) {}
172}
173
174impl<I, T> ExtractContext<I, (T, ErrorKind)> for (I, ErrorKind)
175where
176 T: RecreateContext<I>,
177{
178 fn extract_context(self, original_input: I) -> (T, ErrorKind) {
179 let (tail, kind) = self;
180 (T::recreate_context(original_input, tail), kind)
181 }
182}
183
184impl<I, T> ExtractContext<I, Error<T>> for Error<I>
185where
186 T: RecreateContext<I>,
187{
188 fn extract_context(self, original_input: I) -> Error<T> {
189 Error::new(T::recreate_context(original_input, self.input), self.code)
190 }
191}
192
193impl<I, T> ExtractContext<I, VerboseError<T>> for VerboseError<I>
194where
195 T: RecreateContext<I>,
196 I: Clone,
197{
198 fn extract_context(self, original_input: I) -> VerboseError<T> {
199 VerboseError {
200 errors: self
201 .errors
202 .into_iter()
203 .map(|(input, kind)| (T::recreate_context(original_input.clone(), input), kind))
204 .collect(),
205 }
206 }
207}
208
209/// `extract_context` can be used directly on `Result` values that have an
210/// error with `ExtractContext`
211impl<T, E1, E2, I> ExtractContext<I, Result<T, E2>> for Result<T, E1>
212where
213 E1: ExtractContext<I, E2>,
214{
215 fn extract_context(self, original_input: I) -> Result<T, E2> {
216 self.map_err(move |err| err.extract_context(original_input))
217 }
218}
219
220/// Bootstrapping layer for a nom parser.
221///
222/// This function is intended to be the entry point into a nom parser; it
223/// represents in some sense the "end of composability". It creates a function
224/// which applies a parser to an input. The parser is configured such that it
225/// must parse the *entire* input, and any "Incomplete" responses are reported
226/// as errors. Additionally, if the parser returns an error, the context
227/// information in the error is recombined with the original input via
228/// `ExtractContext` to create a more useful error.
229pub fn final_parser<I, O, E, E2>(parser: impl Parser<I, O, E>) -> impl FnMut(I) -> Result<O, E2>
230where
231 E: ParseError<I> + ExtractContext<I, E2>,
232 I: InputLength + Clone,
233{
234 let mut parser = parser.complete().all_consuming();
235
236 move |input| match parser.parse(input.clone()) {
237 Ok((_, parsed)) => Ok(parsed),
238 Err(NomErr::Error(err)) | Err(NomErr::Failure(err)) => Err(err.extract_context(input)),
239 Err(NomErr::Incomplete(..)) => {
240 unreachable!("Complete combinator should make this impossible")
241 }
242 }
243}