1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241
//! Entry point layer into nom parsers. See [`final_parser`] for details.
use std::fmt::{self, Display, Formatter};
use nom::{
error::{Error, ErrorKind, ParseError, VerboseError},
Err as NomErr, InputLength, Offset, Parser,
};
use crate::parser_ext::ParserExt;
/// Trait for types which represent an absolute location of a parse error in
/// an input
///
/// This trait is implemented for "full location context" types; that is, for
/// types that capture the exact location (and possibly other related context)
/// of a parse error for the purpose of reporting. Two such types are provided
/// by `nom-supreme`: [`Location`], which captures the line and column number
/// of the error, and [`ByteOffset`], which captures the byte offset of the
/// error.
///
/// This trait is usually paired with [`ExtractContext`], a trait used by
/// nom error types to convert the stored input tail types into location
/// contexts. [`ExtractContext`] is in turn used by [`final_parser`] to create
/// & return parse errors with the full attached context.
pub trait RecreateContext<I>: Sized {
/// Given the *original* input, as well as the context reported by nom,
/// recreate a context in the original string where the error occurred.
///
/// This function may return garbage or panic if the tail is not associated
/// with the original input.
fn recreate_context(original_input: I, tail: I) -> Self;
}
impl<I> RecreateContext<I> for I {
fn recreate_context(_original_input: I, tail: I) -> Self {
tail
}
}
/// A byte offset into the input where an error may have occurred
///
/// # `&str` Example
///
/// ```
/// use nom_supreme::final_parser::{ByteOffset, RecreateContext};
///
/// let original_input = "Hello,\nWorld!\n";
/// let tail = &original_input[9..];
///
/// assert_eq!(
/// ByteOffset::recreate_context(original_input, tail),
/// ByteOffset(9),
/// );
/// ```
///
/// `# &[u8]` Example
///
/// ```
/// use nom_supreme::final_parser::{ByteOffset, RecreateContext};
///
/// let original_input = b"GET / HTTP/1.1\r\n";
/// let tail = &original_input[6..];
///
/// assert_eq!(
/// ByteOffset::recreate_context(&original_input[..], tail),
/// ByteOffset(6),
/// );
/// ```
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct ByteOffset(pub usize);
impl<I: Offset> RecreateContext<I> for ByteOffset {
fn recreate_context(original_input: I, tail: I) -> Self {
ByteOffset(original_input.offset(&tail))
}
}
/// A location in a string where an error may have occurred. In keeping with
/// the typical practice from editors and other tools, line and columns are both
/// 1-indexed.
///
/// # Example
///
/// ```
/// use nom_supreme::final_parser::{RecreateContext, Location};
///
/// let original_input = "Hello,\nWorld!\n";
/// let tail = &original_input[9..];
///
/// assert_eq!(
/// Location::recreate_context(original_input, tail),
/// Location { line: 2, column: 3 },
/// );
/// ```
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Location {
/// The line number in the original input where the error occurred
pub line: usize,
/// The column of the line in the original input where the error occurred
pub column: usize,
}
impl Location {
/// Given the *original* input string, as well as the context reported by
/// nom, compute the location in the original string where the error
/// occurred.
///
/// This function will report garbage (and may panic) if the context is not
/// associated with the input
pub fn locate_tail<'a>(original_input: &'a str, tail: &'a str) -> Self {
let offset = original_input.offset(tail);
let input_bytes = original_input.as_bytes();
let prefix = &input_bytes[..offset];
let line_number = memchr::memchr_iter(b'\n', prefix).count() + 1;
let last_line_start = memchr::memrchr(b'\n', prefix).map(|i| i + 1).unwrap_or(0);
let last_line = &prefix[last_line_start..];
let column_number = last_line.len() + 1;
Location {
line: line_number,
column: column_number,
}
}
}
impl Display for Location {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
if f.alternate() {
write!(f, "line {}, column {}", self.line, self.column)
} else {
write!(f, "{}:{}", self.line, self.column)
}
}
}
impl RecreateContext<&str> for Location {
fn recreate_context(original_input: &str, tail: &str) -> Self {
Location::locate_tail(original_input, tail)
}
}
/// Trait for recombining error information with the original input.
///
/// This trait is used to take the context information attached to nom errors-
/// specifically, the tail of the input indicating the location of the input-
/// and recombine it with the *original* input to produce an error with
/// something more useful for error reporting.
///
/// Typically, it looks like `ExtractContext<I, E<T>> for E<I>`. This
/// indicates that some error type `E`, which is generic over the input type,
/// can be converted into another variant of that error, using `T` instead of
/// `I` to hold the result context. Often this context conversion can happen
/// with [`RecreateContext`].
pub trait ExtractContext<I, T> {
/// Given the context attached to a nom error, and given the *original*
/// input to the nom parser, extract more the useful context information.
///
/// For example, for a string, 1 possible context extraction would be the
/// Location (line and column number) in the original input where the error
/// indicated by self occurred.
fn extract_context(self, original_input: I) -> T;
}
impl<I, T> ExtractContext<I, ()> for T {
fn extract_context(self, _original_input: I) {}
}
impl<I, T> ExtractContext<I, (T, ErrorKind)> for (I, ErrorKind)
where
T: RecreateContext<I>,
{
fn extract_context(self, original_input: I) -> (T, ErrorKind) {
let (tail, kind) = self;
(T::recreate_context(original_input, tail), kind)
}
}
impl<I, T> ExtractContext<I, Error<T>> for Error<I>
where
T: RecreateContext<I>,
{
fn extract_context(self, original_input: I) -> Error<T> {
Error::new(T::recreate_context(original_input, self.input), self.code)
}
}
impl<I, T> ExtractContext<I, VerboseError<T>> for VerboseError<I>
where
T: RecreateContext<I>,
I: Clone,
{
fn extract_context(self, original_input: I) -> VerboseError<T> {
VerboseError {
errors: self
.errors
.into_iter()
.map(|(input, kind)| (T::recreate_context(original_input.clone(), input), kind))
.collect(),
}
}
}
/// `extract_context` can be used directly on `Result` values that have an
/// error with `ExtractContext`
impl<T, E1, E2, I> ExtractContext<I, Result<T, E2>> for Result<T, E1>
where
E1: ExtractContext<I, E2>,
{
fn extract_context(self, original_input: I) -> Result<T, E2> {
self.map_err(move |err| err.extract_context(original_input))
}
}
/// Bootstrapping layer for a nom parser.
///
/// This function is intended to be the entry point into a nom parser; it
/// represents in some sense the "end of composability". It creates a function
/// which applies a parser to an input. The parser is configured such that it
/// must parse the *entire* input, and any "Incomplete" responses are reported
/// as errors. Additionally, if the parser returns an error, the context
/// information in the error is recombined with the original input via
/// `ExtractContext` to create a more useful error.
pub fn final_parser<I, O, E, E2>(parser: impl Parser<I, O, E>) -> impl FnMut(I) -> Result<O, E2>
where
E: ParseError<I> + ExtractContext<I, E2>,
I: InputLength + Clone,
{
let mut parser = parser.complete().all_consuming();
move |input| match parser.parse(input.clone()) {
Ok((_, parsed)) => Ok(parsed),
Err(NomErr::Error(err)) | Err(NomErr::Failure(err)) => Err(err.extract_context(input)),
Err(NomErr::Incomplete(..)) => {
unreachable!("Complete combinator should make this impossible")
}
}
}