Skip to main content

reddb_rql/parser/
error.rs

1//! Parser error types
2
3use std::fmt::{self, Write};
4
5use crate::lexer::{LexerError, LexerLimitHit, Position, Token};
6
7/// Parse error
8#[derive(Debug, Clone)]
9pub struct ParseError {
10    /// Error message
11    pub message: String,
12    /// Position where error occurred
13    pub position: Position,
14    /// Expected tokens (for better error messages)
15    pub expected: Vec<String>,
16    /// Optional structured kind for hardening / DoS errors
17    pub kind: ParseErrorKind,
18}
19
20/// Categorical kind for a parse error.
21///
22/// Most parse errors are plain `Syntax` failures; the variants
23/// below carry structured information for the parser-hardening
24/// layer (issue #87) so callers can distinguish DoS-style refusals
25/// from grammar errors without string matching.
26#[derive(Debug, Clone, PartialEq, Eq)]
27pub enum ParseErrorKind {
28    /// Generic syntax / semantic error.
29    Syntax,
30    /// Recursion-depth limit exceeded during parsing.
31    DepthLimit {
32        limit_name: &'static str,
33        value: usize,
34    },
35    /// Input larger than the configured byte cap.
36    InputTooLarge {
37        limit_name: &'static str,
38        value: usize,
39    },
40    /// Identifier longer than the configured character cap.
41    IdentifierTooLong {
42        limit_name: &'static str,
43        value: usize,
44    },
45    /// Parser consumed more tokens than the configured cap.
46    TokenLimit {
47        limit_name: &'static str,
48        value: usize,
49    },
50    /// A literal value (integer / float) parsed cleanly but lies
51    /// outside the semantic range expected for its slot — e.g.
52    /// `MAX_SIZE 0`, `lat = 91.0`, `K = 0`, or a negative integer
53    /// where a positive one is required. The structured payload lets
54    /// the snapshot/property harness distinguish these from generic
55    /// syntax errors without string matching.
56    ValueOutOfRange {
57        /// Stable slot name, e.g. `"MAX_SIZE"`, `"lat"`, `"radius"`.
58        field: &'static str,
59        /// Free-text constraint, e.g. `"must be > 0"`,
60        /// `"must be in -90.0..=90.0"`.
61        constraint: &'static str,
62    },
63    /// The lexer recognized this token, but the parser does not support
64    /// it in the current grammar position.
65    UnsupportedToken { token: String },
66}
67
68impl ParseError {
69    /// Create a new parse error
70    pub fn new(message: impl Into<String>, position: Position) -> Self {
71        Self {
72            message: message.into(),
73            position,
74            expected: Vec::new(),
75            kind: ParseErrorKind::Syntax,
76        }
77    }
78
79    /// Create error with expected tokens
80    ///
81    /// `found` is rendered through [`SafeTokenDisplay`] so caller-controlled
82    /// bytes inside `Token::Ident` / `Token::String` / `Token::JsonLiteral` /
83    /// `Token::Float` / `Token::Integer` payloads are escaped via Rust's
84    /// `escape_debug` rules (CR / LF / NUL / quote bytes become `\n`,
85    /// `\r`, `\0`, `\"`, …). Static keyword and punctuation arms keep their
86    /// existing UPPER-CASE rendering so error messages and snapshot tests
87    /// stay readable. This prevents F-05 smuggling through the downstream
88    /// JSON / audit / log / gRPC sinks that embed `ParseError::message`.
89    pub fn expected(expected: Vec<&str>, found: &Token, position: Position) -> Self {
90        Self {
91            message: format!("Unexpected token: {}", SafeTokenDisplay(found)),
92            position,
93            expected: expected.into_iter().map(|s| s.to_string()).collect(),
94            kind: ParseErrorKind::Syntax,
95        }
96    }
97
98    /// Create an error when a lexer-known keyword appears in a parser
99    /// position where that keyword has no supported production.
100    pub fn unsupported_recognized_token(found: &Token, position: Position) -> Option<Self> {
101        let token = recognized_keyword_name(found)?;
102        Some(Self {
103            message: format!("token {token} is recognized but not supported in this position"),
104            position,
105            expected: Vec::new(),
106            kind: ParseErrorKind::UnsupportedToken { token },
107        })
108    }
109
110    /// Recursion depth limit hit. The structured `kind` carries the
111    /// name + numeric value so the snapshot/property harness can
112    /// pattern-match without string slicing.
113    pub fn depth_limit(limit_name: &'static str, value: usize, position: Position) -> Self {
114        Self {
115            message: format!(
116                "recursion depth limit exceeded ({} = {})",
117                limit_name, value
118            ),
119            position,
120            expected: Vec::new(),
121            kind: ParseErrorKind::DepthLimit { limit_name, value },
122        }
123    }
124
125    /// Input bytes exceeded the configured cap.
126    pub fn input_too_large(limit_name: &'static str, value: usize, position: Position) -> Self {
127        Self {
128            message: format!(
129                "input exceeds maximum size ({} = {} bytes)",
130                limit_name, value
131            ),
132            position,
133            expected: Vec::new(),
134            kind: ParseErrorKind::InputTooLarge { limit_name, value },
135        }
136    }
137
138    /// Identifier exceeded the configured character cap.
139    pub fn identifier_too_long(limit_name: &'static str, value: usize, position: Position) -> Self {
140        Self {
141            message: format!(
142                "identifier exceeds maximum length ({} = {} chars)",
143                limit_name, value
144            ),
145            position,
146            expected: Vec::new(),
147            kind: ParseErrorKind::IdentifierTooLong { limit_name, value },
148        }
149    }
150
151    /// Token budget exceeded during parsing.
152    pub fn token_limit(limit_name: &'static str, value: usize, position: Position) -> Self {
153        Self {
154            message: format!("parser token limit exceeded ({} = {})", limit_name, value),
155            position,
156            expected: Vec::new(),
157            kind: ParseErrorKind::TokenLimit { limit_name, value },
158        }
159    }
160
161    /// A literal value lies outside the allowed range for its slot.
162    /// The free-text `constraint` is included verbatim in the message
163    /// so callers can render a single line without re-formatting.
164    pub fn value_out_of_range(
165        field: &'static str,
166        constraint: &'static str,
167        position: Position,
168    ) -> Self {
169        Self {
170            message: format!("{} {}", field, constraint),
171            position,
172            expected: Vec::new(),
173            kind: ParseErrorKind::ValueOutOfRange { field, constraint },
174        }
175    }
176}
177
178fn recognized_keyword_name(token: &Token) -> Option<String> {
179    match token {
180        Token::String(_)
181        | Token::Integer(_)
182        | Token::Float(_)
183        | Token::JsonLiteral(_)
184        | Token::Ident(_)
185        | Token::Eq
186        | Token::Ne
187        | Token::Lt
188        | Token::Le
189        | Token::Gt
190        | Token::Ge
191        | Token::Plus
192        | Token::Minus
193        | Token::Star
194        | Token::Slash
195        | Token::Percent
196        | Token::LParen
197        | Token::RParen
198        | Token::LBracket
199        | Token::RBracket
200        | Token::LBrace
201        | Token::RBrace
202        | Token::Comma
203        | Token::Dot
204        | Token::Colon
205        | Token::Semi
206        | Token::Dollar
207        | Token::Question
208        | Token::Arrow
209        | Token::ArrowLeft
210        | Token::Dash
211        | Token::DotDot
212        | Token::Pipe
213        | Token::DoublePipe
214        | Token::Eof => None,
215        other => Some(SafeTokenDisplay(other).to_string()),
216    }
217}
218
219impl fmt::Display for ParseError {
220    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
221        write!(f, "Parse error at {}: {}", self.position, self.message)?;
222        if !self.expected.is_empty() {
223            write!(f, " (expected: {})", self.expected.join(", "))?;
224        }
225        Ok(())
226    }
227}
228
229impl std::error::Error for ParseError {}
230
231/// `Display` adapter that emits a `Token` while escaping the
232/// caller-controlled byte payload of `Ident` / `String` / `JsonLiteral` /
233/// `Integer` / `Float` arms.
234///
235/// F-05 (serialization-boundary audit, 2026-05-06): SQL parser error
236/// messages flow into JSON HTTP bodies, JSONL audit rows, gRPC
237/// `Status::message`, PG3 `ErrorResponse`, and `tracing::warn!` log
238/// lines. The default `Token` Display arms emit raw user bytes for
239/// `Token::Ident("foo\nbar")` etc., which lets a tenant smuggle CR /
240/// LF / NUL / quote bytes through every downstream sink at once.
241///
242/// This adapter renders user-controlled arms via `escape_debug` (the
243/// same rules `{:?}` applies to a `&str`) and leaves keyword /
244/// punctuation arms untouched so existing snapshot tests and operator
245/// log readability are preserved.
246pub struct SafeTokenDisplay<'a>(pub &'a Token);
247
248impl fmt::Display for SafeTokenDisplay<'_> {
249    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
250        match self.0 {
251            // User-controlled byte payloads. Render via `escape_debug`
252            // so embedded CR / LF / NUL / quote bytes do not reach
253            // downstream serialization sinks unescaped.
254            Token::Ident(s) => write_escaped(f, s),
255            Token::String(s) => {
256                f.write_str("'")?;
257                write_escaped(f, s)?;
258                f.write_str("'")
259            }
260            Token::JsonLiteral(s) => write_escaped(f, s),
261            // Numeric tokens come straight from the lexer; their
262            // canonical Display form is bounded ASCII, but the lexer
263            // builds them via `to_string` so they cannot carry control
264            // bytes. Pass through Display.
265            Token::Integer(_) | Token::Float(_) => fmt::Display::fmt(self.0, f),
266            // Static keyword / punctuation arms — fall back to the
267            // existing Display output verbatim.
268            other => fmt::Display::fmt(other, f),
269        }
270    }
271}
272
273fn write_escaped(f: &mut fmt::Formatter<'_>, s: &str) -> fmt::Result {
274    for ch in s.chars() {
275        // `escape_debug` matches Rust's Debug rules: ASCII control
276        // bytes become `\n`, `\r`, `\0`, `\t`, …; non-ASCII printable
277        // characters pass through; backslash and double-quote are
278        // escaped.
279        for esc in ch.escape_debug() {
280            f.write_char(esc)?;
281        }
282    }
283    Ok(())
284}
285
286impl From<LexerError> for ParseError {
287    fn from(e: LexerError) -> Self {
288        let kind = match &e.limit_hit {
289            Some(LexerLimitHit::IdentifierTooLong { limit_name, value }) => {
290                ParseErrorKind::IdentifierTooLong {
291                    limit_name,
292                    value: *value,
293                }
294            }
295            None => ParseErrorKind::Syntax,
296        };
297        ParseError {
298            message: e.message,
299            position: e.position,
300            expected: Vec::new(),
301            kind,
302        }
303    }
304}