Skip to main content

rsonpath_syntax/error/
display.rs

1//! Logic for pretty-printing syntax errors.
2//! This includes displaying the error and underline, fix suggestions, and other user-comfort notes.
3//! Managing the style of displayed messages: coloring, emphasis, etc. - is done by the [`style`]
4//! submodule, while this submodule deals with generating all the underlines, suggestions, notes,
5//! and printing it to screen with an injected style.
6use super::{formatter, style};
7use crate::error::{InnerParseError, ParseError, SyntaxErrorKind};
8use crate::str::EscapeMode;
9use formatter::SyntaxErrorLine;
10#[cfg(feature = "color")]
11use std::error::Error;
12use std::fmt;
13use std::fmt::Display;
14
15// Resolve the default style depending on whether the optional color dependencies are available.
16use crate::JSONPATH_WHITESPACE_BYTES;
17#[cfg(feature = "color")]
18use style::colored::OwoColorsErrorStyle as ErrorStyleImpl;
19#[cfg(not(feature = "color"))]
20use style::plain::PlainErrorStyle as ErrorStyleImpl;
21
22/// Controls the default width of tabulation for calculating the width of underlines.
23/// It seems impossible to know how wide they will be displayed on the end user's device during construction,
24/// so we default to 4. This hopefully shouldn't be too annoying: if you're non-ironically using tabs to format
25/// a JSONPath query you're asking for trouble yourself.
26const TAB_DISPLAY_WIDTH: usize = 4;
27/// Controls the maximum allowed width of displayed line, including the underlined error and the context to the left
28/// and right. It has to be limited to _some_ constant, as otherwise every individual error in a single long line
29/// would cause the entire line to be written.
30pub(super) const MAX_ERROR_LINE_WIDTH: usize = 100;
31/// Controls the minimum allowed width of the original query to be displayed to the left and right of the underlined
32/// error. If the error part is excessively long, the truncation could remove all the context. With this we force
33/// it to print at least a few characters.
34pub(super) const MIN_CONTEXT_WIDTH: usize = 5;
35
36/// Allows querying for display width of a character.
37pub(super) trait UnicodeWidth {
38    /// Width of a character to consider for calculating underline offsets and lengths.
39    fn width(&self) -> usize;
40}
41
42impl UnicodeWidth for char {
43    /// Width of a character to consider for calculating underline offsets and lengths.
44    ///
45    /// This is the Unicode width of the character, except for `\t`: it has a Unicode width of 1, which is dumb.
46    /// We use [`TAB_DISPLAY_WIDTH`] to control it instead.
47    fn width(&self) -> usize {
48        // Display tabs with a fixed width.
49        // How this looks depends on the user's terminal settings, but we use a reasonable default.
50        match self {
51            '\t' => TAB_DISPLAY_WIDTH,
52            _ => unicode_width::UnicodeWidthChar::width(*self).unwrap_or(0),
53        }
54    }
55}
56
57/// Get an empty, non-colored style. This should be used when the error is displayed not on-demand
58/// by the end application, but via the default [`Display`] of the error (e.g. during a panic).
59pub(super) fn empty_style() -> ErrorStyleImpl {
60    ErrorStyleImpl::empty()
61}
62
63#[cfg(feature = "color")]
64impl ParseError {
65    /// Turn the error into a version with colored display.
66    #[inline(always)]
67    #[must_use]
68    #[cfg_attr(docsrs, doc(cfg(feature = "color")))]
69    pub fn colored(self) -> impl Error {
70        style::colored::ColoredParseError(self)
71    }
72}
73
74impl SyntaxErrorKind {
75    /// Defines the main error message displayed to the user as the first line of the error.
76    ///
77    /// All must start with lowercase, not end with a period, and ideally be a single short sentence.
78    /// It should avoid including any contextful information, for example a number parsing error should
79    /// not include the input that failed in the message – instead, the error will be underlined
80    /// and the user should see the relevant [`underline_message`] next to it.
81    #[inline]
82    fn toplevel_message(&self) -> String {
83        match self {
84            Self::DisallowedLeadingWhitespace => "query starting with whitespace".to_string(),
85            Self::DisallowedTrailingWhitespace => "query ending with whitespace".to_string(),
86            Self::InvalidUnescapedCharacter => "invalid unescaped control character".to_string(),
87            Self::InvalidEscapeSequence => "invalid escape sequence".to_string(),
88            Self::UnpairedHighSurrogate => "invalid unicode escape sequence - unpaired high surrogate".to_string(),
89            Self::UnpairedLowSurrogate => "invalid unicode escape sequence - unpaired low surrogate".to_string(),
90            Self::InvalidHexDigitInUnicodeEscape => "invalid unicode escape sequence - invalid hex digit".to_string(),
91            Self::MissingClosingDoubleQuote => "double-quoted name selector is not closed".to_string(),
92            Self::MissingClosingSingleQuote => "single-quoted name selector is not closed".to_string(),
93            Self::MissingRootIdentifier => "query not starting with the root identifier '$'".to_string(),
94            Self::InvalidSegmentStart => "invalid segment syntax".to_string(),
95            Self::InvalidSegmentAfterTwoPeriods => "invalid descendant segment syntax".to_string(),
96            Self::InvalidNameShorthandAfterOnePeriod => "invalid short member name syntax".to_string(),
97            Self::InvalidSelector => "invalid selector syntax".to_string(),
98            Self::EmptySelector => "invalid selector - empty".to_string(),
99            Self::MissingSelectorSeparator => "selectors not separated with commas".to_string(),
100            Self::MissingClosingBracket => "bracketed selection is not closed".to_string(),
101            Self::NegativeZeroInteger => "negative zero used as an integer".to_string(),
102            Self::LeadingZeros => "integer with leading zeros".to_string(),
103            Self::IndexParseError(_) => "invalid index value".to_string(),
104            Self::SliceStartParseError(_) => "invalid slice start".to_string(),
105            Self::SliceEndParseError(_) => "invalid slice end".to_string(),
106            Self::SliceStepParseError(_) => "invalid slice step value".to_string(),
107            Self::NumberParseError(_) => "invalid number format".to_string(),
108            Self::MissingClosingParenthesis => "missing closing parenthesis in filter expression".to_string(),
109            Self::InvalidNegation => "invalid use of logical negation".to_string(),
110            Self::MissingComparisonOperator => "missing comparison operator".to_string(),
111            Self::InvalidComparisonOperator => "invalid comparison operator".to_string(),
112            Self::InvalidComparable => "invalid right-hand side of comparison".to_string(),
113            Self::NonSingularQueryInComparison => "non-singular query used in comparison".to_string(),
114            Self::InvalidFilter => "invalid filter expression syntax".to_string(),
115        }
116    }
117
118    /// Defines the error message displayed to the user right underneath the highlighted invalid
119    /// portion of the query string.
120    ///
121    /// All must start with lowercase, not end with a period, and ideally be a single short sentence.
122    #[inline]
123    fn underline_message(&self) -> String {
124        match self {
125            Self::DisallowedLeadingWhitespace => "leading whitespace is disallowed".to_string(),
126            Self::DisallowedTrailingWhitespace => "trailing whitespace is disallowed".to_string(),
127            Self::InvalidUnescapedCharacter => "this character must be escaped".to_string(),
128            Self::InvalidEscapeSequence => "not a valid escape sequence".to_string(),
129            Self::UnpairedHighSurrogate => "this high surrogate is unpaired".to_string(),
130            Self::UnpairedLowSurrogate => "this low surrogate is unpaired".to_string(),
131            Self::InvalidHexDigitInUnicodeEscape => "not a hex digit".to_string(),
132            Self::MissingClosingDoubleQuote => "expected a double quote '\"'".to_string(),
133            Self::MissingClosingSingleQuote => "expected a single quote `'`".to_string(),
134            Self::MissingRootIdentifier => "the '$' character missing before here".to_string(),
135            Self::InvalidSegmentStart => "not a valid segment syntax".to_string(),
136            Self::InvalidSegmentAfterTwoPeriods => "not a valid descendant segment syntax".to_string(),
137            Self::InvalidNameShorthandAfterOnePeriod => "not a valid name shorthand".to_string(),
138            Self::InvalidSelector => "not a valid selector".to_string(),
139            Self::EmptySelector => "expected a selector here, but found nothing".to_string(),
140            Self::MissingSelectorSeparator => "expected a comma separator before this character".to_string(),
141            Self::MissingClosingBracket => "expected a closing bracket ']'".to_string(),
142            Self::NegativeZeroInteger => "negative zero is not allowed".to_string(),
143            Self::LeadingZeros => "leading zeros are not allowed".to_string(),
144            Self::IndexParseError(inner) => format!("this index value is invalid; {inner}"),
145            Self::SliceStartParseError(inner) => format!("this start index is invalid; {inner}"),
146            Self::SliceEndParseError(inner) => format!("this end index is invalid; {inner}"),
147            Self::SliceStepParseError(inner) => format!("this step value is invalid; {inner}"),
148            Self::NumberParseError(inner) => format!("this number is invalid; {inner}"),
149            Self::MissingClosingParenthesis => "expected a closing parenthesis `(`".to_string(),
150            Self::InvalidNegation => "this negation is ambiguous".to_string(),
151            Self::InvalidComparable => "expected a literal or a filter query here".to_string(),
152            Self::NonSingularQueryInComparison => "this query is not singular".to_string(),
153            Self::MissingComparisonOperator => "expected a comparison operator here".to_string(),
154            Self::InvalidComparisonOperator => "not a valid comparison operator".to_string(),
155            Self::InvalidFilter => "not a valid filter expression".to_string(),
156        }
157    }
158}
159impl super::SyntaxError {
160    /// This creates friendly displayable errors.
161    ///
162    /// An error consists of
163    /// - The toplevel error name/message.
164    /// - A list of lines of the input, each with an optional underline message.
165    /// - A list of notes/suggestions at the end.
166    ///
167    /// Every error displays the entire error as well as some context before and after the error.
168    /// These are called the _pre-context_ and _post-context_, respectively. Ideally, we display the entire
169    /// line with the error. However, if the line is very long it would kill performance if many separate errors
170    /// were to print all of it to the output. Instead, we use the [`DisplayableSyntaxErrorBuilder`] to maintain
171    /// a manageable pre- and post-context (controlled by [`MAX_ERROR_LINE_WIDTH`]).
172    ///
173    /// Controlling the width requires computing byte index offsets and widths of all characters. To avoid quadratic
174    /// blowup, we compute this information once for the input via [`IndexedInput`](indexed_input::IndexedInput)
175    /// and use it in every [`display`] invocation.
176    fn display(
177        &self,
178        input: &formatter::ErrorFormatter,
179        suggestion: &mut Suggestion,
180        style: ErrorStyleImpl,
181    ) -> DisplayableSyntaxError {
182        let start_idx = input.len() - self.rev_idx;
183        let end_idx = start_idx + self.len - 1;
184
185        let lines = input.build_error_lines(
186            start_idx,
187            end_idx,
188            MIN_CONTEXT_WIDTH,
189            MAX_ERROR_LINE_WIDTH,
190            self.kind.underline_message(),
191        );
192        let notes = self.generate_notes(suggestion, input.str());
193
194        DisplayableSyntaxError {
195            toplevel_message: self.kind.toplevel_message(),
196            start_idx,
197            end_idx,
198            lines,
199            notes,
200            is_multiline: input.is_multiline(),
201            style,
202        }
203    }
204
205    /// Add suggestions and notes to the error message based on the error kind.
206    fn generate_notes(&self, suggestion: &mut Suggestion, input: &str) -> Vec<SyntaxErrorNote> {
207        // Figure out the first and last byte of the highlighted error. Errors always respect UTF-8 boundaries.
208        let start_idx = input.len() - self.rev_idx;
209        let end_idx = start_idx + self.len - 1;
210        let (prefix, error, suffix) = self.split_error(input);
211        // Kind-specific notes and suggestion building.
212        let mut notes = vec![];
213        match self.kind {
214            SyntaxErrorKind::DisallowedLeadingWhitespace | SyntaxErrorKind::DisallowedTrailingWhitespace => {
215                // Suggestion is to just remove the whitespace.
216                suggestion.remove(start_idx, error.len());
217            }
218            SyntaxErrorKind::InvalidUnescapedCharacter => {
219                // Escaping is context-sensitive (depends on surrounding quotes) for single and double quotes.
220                // For everything else we can use the existing machinery and pass an arbitrary EscapeMode.
221                if error == "\"" {
222                    suggestion.replace(start_idx, 1, r#"\""#);
223                } else if error == "'" {
224                    suggestion.replace(start_idx, 1, r"\'");
225                } else {
226                    let escaped = crate::str::escape(error, EscapeMode::DoubleQuoted);
227                    suggestion.replace(start_idx, error.len(), escaped);
228                }
229            }
230            SyntaxErrorKind::InvalidEscapeSequence => {
231                if error == r"\U" && suffix.len() >= 4 && suffix[..4].chars().all(|x| x.is_ascii_hexdigit()) {
232                    // The user probably tried to use a Unicode escape but is unaware the `u` is case-sensitive.
233                    notes.push("unicode escape sequences must use a lowercase 'u'".into());
234                    suggestion.replace(start_idx, 2, r"\u");
235                } else if error == r#"\""# {
236                    // We were in a string but escaping `"` was an error.
237                    // Thus, the string must be single-quote delimited and the double quote should be unescaped.
238                    notes.push("double quotes may only be escaped within double-quoted name selectors".into());
239                    suggestion.replace(start_idx, 2, r#"""#);
240                } else if error == r"\'" {
241                    // Analogous to above, but for single quotes in double-quote delimited strings.
242                    notes.push("single quotes may only be escaped within single-quoted name selectors".into());
243                    suggestion.replace(start_idx, 2, "'");
244                } else {
245                    // Try to suggest escaping the backslash. This might not be accurate, as the user might've tried to
246                    // use some unsupported escape sequence like \v. It might be useful to add some common escape
247                    // sequences not valid for JSONPath and suggest to replace them with the corresponding character
248                    // or full Unicode escape. This is "good enough" though, it's just a suggestion after all.
249                    notes.push(r#"the only valid escape sequences are \n, \r, \t, \f, \b, \\, \/, \' (in single quoted names), \" (in double quoted names), and \uXXXX where X are hex digits"#.into());
250                    notes.push(r"if you meant to match a literal backslash, you need to escape it with \\".into());
251                    suggestion.insert(start_idx, r"\");
252                }
253            }
254            SyntaxErrorKind::UnpairedHighSurrogate => {
255                notes.push(
256                    "a UTF-16 high surrogate has to be followed by a low surrogate to encode a valid Unicode character".into(),
257                );
258                notes.push("for more information about UTF-16 surrogate pairs see https://en.wikipedia.org/wiki/UTF-16#Code_points_from_U+010000_to_U+10FFFF".into());
259                // No way to guess what the user wanted here.
260                suggestion.invalidate();
261            }
262            SyntaxErrorKind::UnpairedLowSurrogate => {
263                notes.push(
264                    "a UTF-16 low surrogate has to be preceded by a high surrogate to encode a valid Unicode character".into(),
265                );
266                notes.push("for more information about UTF-16 surrogate pairs see https://en.wikipedia.org/wiki/UTF-16#Code_points_from_U+010000_to_U+10FFFF".into());
267                // No way to guess what the user wanted here.
268                suggestion.invalidate();
269            }
270            SyntaxErrorKind::InvalidHexDigitInUnicodeEscape => {
271                notes.push("valid hex digits are 0 through 9 and A through F (case-insensitive)".into());
272                // We can't possibly guess what the user got wrong here. Most likely they forgot one of the digits
273                // and the next character was picked up as a hex digit, but we can't resolve that.
274                suggestion.invalidate();
275            }
276            // These three are straightforward.
277            SyntaxErrorKind::MissingClosingSingleQuote => suggestion.insert(end_idx, "'"),
278            SyntaxErrorKind::MissingClosingDoubleQuote => suggestion.insert(end_idx, "\""),
279            SyntaxErrorKind::MissingRootIdentifier => suggestion.insert(start_idx, "$"),
280            SyntaxErrorKind::InvalidSegmentStart => {
281                notes.push("valid segments are: member name shorthands like `.name`/`..name`; or child/descendant bracketed selections like `[<segments>]`/`..[<segments>]`".into());
282                // We can't possibly guess what segment the user wanted here.
283                suggestion.invalidate();
284            }
285            SyntaxErrorKind::InvalidSegmentAfterTwoPeriods => {
286                if error.starts_with('.') {
287                    // The user probably put too many periods, try to trim to two.
288                    let nerror = error.trim_start_matches('.');
289                    let number_of_periods = error.len() - nerror.len();
290                    suggestion.remove(start_idx, number_of_periods);
291                } else {
292                    // Otherwise, who knows?! E.g. `$..5` might've been an attempt to use an index selector `$..[5]` or
293                    // a name selector for the string "5", i.e. `$..['5']`. Both suggestions seem equally plausible.
294                    suggestion.invalidate();
295                }
296                notes.push("valid segments are either member name shorthands `name`, or bracketed selections like `['name']` or `[42]`".into());
297            }
298            SyntaxErrorKind::InvalidNameShorthandAfterOnePeriod => {
299                // Detects using periods in conjunction with bracketed selectors - it's a very common mistake, so it's
300                // important to have good suggestions here!
301                if error.starts_with('[') && error.ends_with(']') {
302                    // This means someone input .[a] or .['a']. The suggestion is to first remove the period.
303                    suggestion.remove(start_idx - 1, 1);
304                    // Now, if someone input .[a] then the quotes are also missing. We do our best to figure out which
305                    // quotes could work and insert them. If this fails then we need to manually escape single quotes.
306                    let looks_valid = (error.starts_with("['") && error.ends_with("']"))
307                        || (error.starts_with("[\"") && error.ends_with("\"]"));
308                    if !looks_valid {
309                        fix_unquoted_bracketed_selector(suggestion, error.as_bytes(), start_idx);
310                    }
311                } else {
312                    // Otherwise it's not clear what to suggest. As in the descendant case above, a pattern like
313                    // `$.5` is ambiguous.
314                    suggestion.invalidate();
315                }
316            }
317            SyntaxErrorKind::MissingSelectorSeparator => {
318                // This is always resolvable by just adding the separator. We do that while respecting sensible
319                // whitespacing, i.e. `$['a' 'b']` becomes `$['a`, 'b']` and not `$['a' ,'b']`.
320                let prefix_whitespace_len = prefix.len() - prefix.trim_end_matches(' ').len();
321                suggestion.insert(start_idx - prefix_whitespace_len, ",");
322            }
323            // These two are straightforward.
324            SyntaxErrorKind::MissingClosingBracket => suggestion.insert(end_idx, "]"),
325            SyntaxErrorKind::MissingClosingParenthesis => suggestion.insert(end_idx, ")"),
326            // Also straightforward, just use a plain zero instead.
327            SyntaxErrorKind::NegativeZeroInteger => suggestion.replace(start_idx, error.len(), "0"),
328            SyntaxErrorKind::LeadingZeros => {
329                // Leading zeroes are always resolvable by simply removing the zeroes,
330                // but we need to take care to handle negative numbers correctly.
331                // The error highlights the entire integer with the minus when it fails, so detection is easy.
332                let is_negative = error.starts_with('-');
333                // We find the meaningful part of the number, ignoring the sign and all leading zeroes.
334                // This works because:
335                //  - we remember the sign and offset the removal index to preserve it if needed;
336                //  - the minus sign is always tightly attached to the number, i.e. inputting `- 01` is invalid
337                //    and would result in a different error altogether;
338                //  - we separately ensure we don't replace a zero (e.g. `00`) with nothing.
339                let replacement = error.trim_start_matches(['-', '0']);
340                let offset = if is_negative { 1 } else { 0 };
341
342                if replacement.is_empty() {
343                    // Special case where there is nothing left after the trim.
344                    // Note that a negative zero would be an error, so we forget if the sign was there and suggest
345                    // a "normal" zero.
346                    suggestion.replace(start_idx, error.len(), "0");
347                } else {
348                    // Remove the number of trimmed characters, perhaps except the minus sign.
349                    let remove_len = error.len() - replacement.len() - offset;
350                    suggestion.remove(start_idx + offset, remove_len);
351                }
352            }
353            SyntaxErrorKind::NonSingularQueryInComparison => {
354                notes.push("singular queries use only child segments with single name or index selectors".into());
355                // There is no way to fix it, this is simply unsupported by JSONPath.
356                suggestion.invalidate();
357            }
358            // This one is hard, as it's kind of a catch-all "user input is nonsense" error kind.
359            // However, there are some special cases that are useful to match against:
360            //  - a selector like `[a]` is invalid, but the user probably wanted to search for the key `a`, so we should
361            //    suggest inserting quotes;
362            //  - whitespace between sign and number is disallowed (e.g. `$[- 1]` is illegal), but the user probably
363            //    just wants the version without whitespace.
364            //
365            // If any other cases are reasonable and fall into this kind, suggestion generation should be added here.
366            SyntaxErrorKind::InvalidSelector => 'handler: {
367                let input_bytes = input.as_bytes();
368                // Handle the minus-with-whitespace case first.
369                if error.starts_with('-') {
370                    use std::str::FromStr as _;
371                    let white_space_len = error
372                        .as_bytes()
373                        .iter()
374                        .skip(1)
375                        .take_while(|c| JSONPATH_WHITESPACE_BYTES.contains(c))
376                        .count();
377                    // Make sure the suggestion makes sense, i.e. after removing the whitespace we are left with an actual number.
378                    // This requires us to also trim leading zeroes and handle the case when all digits were zero.
379                    let leading_zero_len = error
380                        .as_bytes()
381                        .iter()
382                        .skip(1 + white_space_len)
383                        .take_while(|c| **c == b'0')
384                        .count();
385                    if 1 + white_space_len + leading_zero_len == error.len() {
386                        // This was just a very elaborate negative zero.
387                        suggestion.replace(start_idx, error.len(), "0");
388                        break 'handler;
389                    }
390                    // Now make sure the rest is a sensible number. Slicing is allowed since we checked all characters
391                    // we skipped are just ASCII.
392                    let rest = &error[1 + white_space_len + leading_zero_len..];
393                    if crate::num::JsonNumber::from_str(rest).is_ok() {
394                        // We're okay, just remove all the nonsense.
395                        suggestion.remove(start_idx, 1 + white_space_len + leading_zero_len);
396                        break 'handler;
397                    }
398                    // Otherwise we can't handle this, but maybe something below will.
399                }
400
401                // Try to handle the case where we are delimited by (brackets or commas).
402                if start_idx == 0 || end_idx == input_bytes.len() {
403                    // The error is not delimited by anything.
404                    suggestion.invalidate();
405                } else {
406                    // We need to respect whitespace, so find the delimiters.
407                    let mut start_boundary = start_idx - 1;
408                    let mut end_boundary = end_idx + 1;
409                    while start_boundary > 0 && input_bytes[start_boundary].is_ascii_whitespace() {
410                        start_boundary -= 1;
411                    }
412                    while end_boundary < input.len() - 1 && input_bytes[end_boundary].is_ascii_whitespace() {
413                        end_boundary += 1;
414                    }
415
416                    // If it's brackets or commas then we can try to fix the selector.
417                    if [b'[', b','].contains(&input_bytes[start_boundary])
418                        && [b']', b','].contains(&input_bytes[end_boundary])
419                    {
420                        // The invalid selector is bracketed, so the user might've meant to search for the string inside
421                        // but forgot the quotes. Try to fix it if possible.
422                        fix_unquoted_bracketed_selector(
423                            suggestion,
424                            &input_bytes[start_idx - 1..=end_idx + 1],
425                            start_idx - 1,
426                        );
427                    } else {
428                        // Otherwise we can't do anything.
429                        suggestion.invalidate()
430                    }
431                }
432            }
433            SyntaxErrorKind::EmptySelector => {
434                // An empty selector like `$[]`. Maybe the user wants to select everything with no particular filter?
435                suggestion.insert(start_idx + 1, "*");
436                notes.push("if you meant to match any value, you should use the wildcard selector `*`".into());
437            }
438            SyntaxErrorKind::InvalidNegation => {
439                // This is an ambiguous logical negation. We cannot resolve it for the user since
440                // we don't know which version they meant, so we signal to disambiguate.
441                notes.push("add parenthesis around the expression you want to negate".into());
442            }
443            // These are number-parsing errors other than the JSONPath-specific leading-zero and negative-zero ones.
444            // Can't think of a good suggestion algorithm for those.
445            SyntaxErrorKind::IndexParseError(_)
446            | SyntaxErrorKind::SliceStartParseError(_)
447            | SyntaxErrorKind::SliceStepParseError(_)
448            | SyntaxErrorKind::SliceEndParseError(_)
449            | SyntaxErrorKind::NumberParseError(_)
450            // There might be some sensible cases here, but I can't think of any at the moment.
451            | SyntaxErrorKind::InvalidComparisonOperator
452            // We cannot possibly guess what operator the user meant.
453            | SyntaxErrorKind::MissingComparisonOperator
454            // There might be some useful cases here like with the InvalidSelector. Feel free to suggest.
455            | SyntaxErrorKind::InvalidFilter
456            | SyntaxErrorKind::InvalidComparable => suggestion.invalidate(),
457        }
458
459        // Generic notes.
460        if error.starts_with('$') {
461            notes.push("the root identifier '$' must appear exactly once at the start of the query".into());
462        }
463
464        return notes;
465
466        fn fix_unquoted_bracketed_selector(suggestion: &mut Suggestion, selector_bytes: &[u8], idx_offset: usize) {
467            // Try to fix a selector of the form `[somestr]` that is missing quotes.
468            // There are three possible way of fixing it - `['somestr']`, `["somestr"]`, and also sometimes simplifying
469            // to the shorthand selector `somestr`. We ignore the shorthand to simplify and try to suggest one of the
470            // canonical forms. We prefer single quotes over double quotes, unless `somestr` contains unescaped single
471            // quotes already. If `somestr` contains both kinds of quotes we will need to find all unescaped single
472            // quotes and escape them before inserting the delimiting ones.
473            let mut escaped = false;
474            let mut unescaped_single = false;
475            let mut unescaped_double = false;
476            for &b in selector_bytes.iter().skip(1).take(selector_bytes.len() - 1) {
477                if !escaped && b == b'\'' {
478                    unescaped_single = true;
479                }
480                if !escaped && b == b'"' {
481                    unescaped_double = true;
482                }
483                if b == b'\\' {
484                    escaped = !escaped;
485                } else {
486                    escaped = false;
487                }
488            }
489            if !unescaped_single {
490                suggestion.insert(idx_offset + 1, "'");
491                suggestion.insert(idx_offset + selector_bytes.len() - 1, "'");
492            } else if !unescaped_double {
493                suggestion.insert(idx_offset + 1, "\"");
494                suggestion.insert(idx_offset + selector_bytes.len() - 1, "\"");
495            } else {
496                // Go again and escape all unescaped quotes.
497                let mut escaped = false;
498                for (i, &b) in selector_bytes.iter().enumerate() {
499                    if !escaped && b == b'\'' {
500                        suggestion.insert(idx_offset + i, "\\");
501                    }
502                    if b == b'\\' {
503                        escaped = !escaped;
504                    } else {
505                        escaped = false;
506                    }
507                }
508                // Now inserting single quotes is valid.
509                suggestion.insert(idx_offset + 1, "'");
510                suggestion.insert(idx_offset + selector_bytes.len() - 1, "'");
511            }
512        }
513    }
514
515    /// Locate the error within the input and split it into three parts, (prefix, error, suffix).
516    fn split_error<'a>(&self, input: &'a str) -> (&'a str, &'a str, &'a str) {
517        let start = input.len() - self.rev_idx;
518        let (prefix, rest) = input.split_at(start);
519        let (error, suffix) = if self.len >= rest.len() {
520            (rest, "")
521        } else {
522            rest.split_at(self.len)
523        };
524        (prefix, error, suffix)
525    }
526}
527
528/// Format a [`ParseError`] into a [`Formatter`](fmt::Formatter) using the specified [`ErrorStyleImpl`].
529#[inline(always)]
530pub(super) fn fmt_parse_error(error: &ParseError, style: &ErrorStyleImpl, f: &mut fmt::Formatter<'_>) -> fmt::Result {
531    match &error.inner {
532        InnerParseError::Syntax(syntax_errors) => {
533            // We display all the errors separately and accumulate the fixes to show one suggestion at the end.
534            // First, index the input to avoid repeating work between consecutive errors.
535            let indexed_input = formatter::ErrorFormatter::new(&error.input);
536            let mut suggestion = Suggestion::new();
537            for syntax_error in syntax_errors {
538                writeln!(
539                    f,
540                    "{}",
541                    syntax_error.display(&indexed_input, &mut suggestion, style.clone())
542                )?;
543            }
544
545            if let Some(suggestion) = suggestion.apply(&error.input) {
546                writeln!(
547                    f,
548                    "{} did you mean `{}` ?",
549                    style.note_prefix(&"suggestion:"),
550                    style.suggestion(&suggestion)
551                )?;
552            }
553        }
554        InnerParseError::RecursionLimit(limit) => {
555            writeln!(
556                f,
557                "{} {}",
558                style.error_prefix(&"error:"),
559                style.error_message(&"nesting level exceeded")
560            )?;
561            writeln!(f)?;
562            writeln!(f, "  {}", error.input)?;
563            writeln!(
564                f,
565                "{} the parser limits nesting to {}; this applies to filter logical expressions",
566                style.note_prefix(&"note:"),
567                limit
568            )?;
569        }
570    }
571
572    Ok(())
573}
574
575/// Syntax error that can be pretty-printed.
576///
577/// This is not a publicly accessible type and exists only as an intermediary between the actual [`ParserError`]
578/// and the output for its display.
579struct DisplayableSyntaxError {
580    toplevel_message: String,
581    start_idx: usize,
582    end_idx: usize,
583    is_multiline: bool,
584    lines: Vec<SyntaxErrorLine>,
585    notes: Vec<SyntaxErrorNote>,
586    style: ErrorStyleImpl,
587}
588
589struct SyntaxErrorNote {
590    message: String,
591}
592
593impl From<&str> for SyntaxErrorNote {
594    #[inline]
595    fn from(value: &str) -> Self {
596        Self {
597            message: value.to_string(),
598        }
599    }
600}
601
602/// Suggestion for correcting the erroneous input, displayed to the user.
603///
604/// The suggestion is either a sequence of diff operations that can be applied to transform the input into a correct
605/// one, or an [`Invalid`](Suggestion::Invalid) state which disables the suggestion &ndash; sometimes it's impossible
606/// to make a sensible one.
607enum Suggestion {
608    Valid(Vec<SuggestionDiff>),
609    Invalid,
610}
611
612#[derive(Debug)]
613enum SuggestionDiff {
614    /// At a given byte index of the original input, insert the given string.
615    Insert(usize, String),
616    /// Starting at a given byte index of the original input, remove this many bytes.
617    Remove(usize, usize),
618    /// Starting at a given byte index of the original input, remove this many bytes
619    /// and replace them with the given string.
620    Replace(usize, usize, String),
621}
622
623impl SuggestionDiff {
624    fn start_idx(&self) -> usize {
625        match self {
626            Self::Remove(idx, _) | Self::Replace(idx, _, _) | Self::Insert(idx, _) => *idx,
627        }
628    }
629}
630
631impl Suggestion {
632    fn new() -> Self {
633        Self::Valid(vec![])
634    }
635
636    /// At a given byte index of the original input, insert the given string.
637    fn insert<S: AsRef<str>>(&mut self, at: usize, str: S) {
638        self.push(SuggestionDiff::Insert(at, str.as_ref().to_string()))
639    }
640
641    /// Starting at a given byte index of the original input, remove this many bytes.
642    fn remove(&mut self, at: usize, len: usize) {
643        self.push(SuggestionDiff::Remove(at, len))
644    }
645
646    /// Starting at a given byte index of the original input, remove this many bytes
647    /// and replace them with the given string.
648    fn replace<S: AsRef<str>>(&mut self, at: usize, remove_len: usize, str: S) {
649        self.push(SuggestionDiff::Replace(at, remove_len, str.as_ref().to_string()))
650    }
651
652    fn push(&mut self, diff: SuggestionDiff) {
653        match self {
654            Self::Valid(diffs) => diffs.push(diff),
655            Self::Invalid => (),
656        }
657    }
658
659    fn invalidate(&mut self) {
660        *self = Self::Invalid
661    }
662
663    /// Apply the suggestion to the given input (if possible and not [`Invalid`](Suggestion::Invalid)).
664    fn apply(self, input: &str) -> Option<String> {
665        match self {
666            Self::Invalid => None,
667            Self::Valid(mut diffs) => {
668                // Treat the `diffs` as a stack of suggestions with the nearest start_idx at the top.
669                // Then go through each character in the input and perform an action if the char idx matches the top
670                // of the stack. This relies on the suggestions being sensible and respecting UTF-8 boundaries.
671                let mut result = String::new();
672                let mut input_chars = input.char_indices();
673                let mut next = input_chars.next();
674                diffs.sort_by_key(SuggestionDiff::start_idx);
675                diffs.reverse();
676
677                while let Some((i, c)) = next {
678                    if let Some(x) = diffs.last() {
679                        if x.start_idx() == i {
680                            let x = diffs.pop().expect("unreachable, last is Some");
681                            match x {
682                                SuggestionDiff::Insert(_, str) => {
683                                    result.push_str(&str);
684                                }
685                                SuggestionDiff::Remove(_, len) => {
686                                    let end_idx = i + len;
687                                    while let Some((i, _)) = next {
688                                        if i >= end_idx {
689                                            break;
690                                        }
691                                        next = input_chars.next();
692                                    }
693                                }
694                                SuggestionDiff::Replace(_, len, str) => {
695                                    result.push_str(&str);
696                                    let end_idx = i + len;
697                                    while let Some((i, _)) = next {
698                                        if i >= end_idx {
699                                            break;
700                                        }
701                                        next = input_chars.next();
702                                    }
703                                }
704                            }
705                            continue;
706                        }
707                    }
708                    // else when no diff is applied
709                    next = input_chars.next();
710                    result.push(c);
711                }
712
713                // Any diffs that remain should be inserts at the end.
714                // Verify that and apply them.
715                while let Some(diff) = diffs.pop() {
716                    match diff {
717                        SuggestionDiff::Insert(at, str) if at == input.len() => result.push_str(&str),
718                        _ => panic!("invalid suggestion diff beyond bounds of input: {diff:?}"),
719                    }
720                }
721
722                Some(result)
723            }
724        }
725    }
726}
727
728// Actually display the error.
729// This is straightforward - all hard logic was performed above, now we just read the instructions and follow them
730// while applying the internal style.
731impl Display for DisplayableSyntaxError {
732    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
733        // Top-level error message.
734        writeln!(
735            f,
736            "{} {}",
737            self.style.error_prefix(&"error:"),
738            self.style.error_message(&self.toplevel_message)
739        )?;
740        writeln!(f)?;
741
742        // Annotated lines of input.
743        for line in &self.lines {
744            // Only print line numbers if required.
745            if self.is_multiline {
746                write!(
747                    f,
748                    " {: >3} {} ",
749                    self.style.line_numbers(&(line.line_number + 1)),
750                    self.style.line_numbers(&"|"),
751                )?;
752            } else {
753                write!(f, "  ")?;
754            }
755            if line.truncated_start {
756                write!(f, "{}", self.style.truncation_marks(&"(...) "))?;
757            }
758            write!(f, "{}", line.line)?;
759            if line.truncated_end {
760                write!(f, "{}", self.style.truncation_marks(&" (...)"))?;
761            }
762            if !line.line.ends_with('\n') {
763                writeln!(f)?;
764            }
765
766            // Print the underline if it exists in this line.
767            if let Some(underline) = &line.underline {
768                if underline.len > 0 {
769                    // If the input is multiline then we extend the vertical line to look nicer.
770                    if self.is_multiline {
771                        write!(f, "     {} ", self.style.line_numbers(&"|"))?;
772                    } else {
773                        write!(f, "  ")?;
774                    }
775
776                    for _ in 0..underline.start_pos {
777                        write!(f, " ")?;
778                    }
779                    if line.truncated_start {
780                        write!(f, "      ")?;
781                    }
782                    for _ in 0..underline.len {
783                        write!(f, "{}", self.style.error_underline(&"^"))?;
784                    }
785                    if let Some(msg) = &underline.message {
786                        writeln!(f, " {}", self.style.error_underline_message(msg))?;
787                    } else {
788                        writeln!(f)?;
789                    }
790                }
791            }
792        }
793
794        // If the input is multiline then we offset the bytes indices so that they visually start directly below
795        // the vertical bar. Purely aesthetical choice.
796        if self.is_multiline {
797            write!(f, "   ")?;
798        }
799        // Print the byte indices, differently if there's only one or if it's a range.
800        if self.start_idx == self.end_idx {
801            writeln!(
802                f,
803                "  {} {}{}",
804                self.style.error_position_hint(&"(byte"),
805                self.style.error_position_hint(&self.start_idx),
806                self.style.error_position_hint(&")")
807            )?;
808        } else {
809            writeln!(
810                f,
811                "  {} {}{}{}{}",
812                self.style.error_position_hint(&"(bytes"),
813                self.style.error_position_hint(&self.start_idx),
814                self.style.error_position_hint(&"-"),
815                self.style.error_position_hint(&self.end_idx),
816                self.style.error_position_hint(&")")
817            )?;
818        }
819
820        writeln!(f)?;
821
822        // Print all the notes at the end.
823        if !self.notes.is_empty() {
824            // Track if it's the first line to avoid a trailing newline.
825            let mut first = true;
826            for note in &self.notes {
827                if !first {
828                    writeln!(f)?;
829                }
830                write!(f, "{} {note}", self.style.note_prefix(&"note:"))?;
831                first = false;
832            }
833        }
834
835        Ok(())
836    }
837}
838
839impl Display for SyntaxErrorNote {
840    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
841        write!(f, "{}", self.message)
842    }
843}
844
845#[cfg(test)]
846mod tests {
847    use super::*;
848
849    #[test]
850    fn artificial_suggestion_test() {
851        let input = "$..['abc' 'def']....abc..['\n']";
852        let mut suggestion = Suggestion::new();
853        suggestion.insert(9, ",");
854        suggestion.remove(18, 2);
855        suggestion.replace(27, 1, "\\n");
856
857        let result = suggestion.apply(input).unwrap();
858        assert_eq!(result, "$..['abc', 'def']..abc..['\\n']");
859    }
860}