reddb_rql/parser/error.rs
1//! Parser error types
2
3use std::fmt::{self, Write};
4
5use crate::lexer::{LexerError, LexerLimitHit, Position, Token};
6
7/// Parse error
8#[derive(Debug, Clone)]
9pub struct ParseError {
10 /// Error message
11 pub message: String,
12 /// Position where error occurred
13 pub position: Position,
14 /// Expected tokens (for better error messages)
15 pub expected: Vec<String>,
16 /// Optional structured kind for hardening / DoS errors
17 pub kind: ParseErrorKind,
18}
19
20/// Categorical kind for a parse error.
21///
22/// Most parse errors are plain `Syntax` failures; the variants
23/// below carry structured information for the parser-hardening
24/// layer (issue #87) so callers can distinguish DoS-style refusals
25/// from grammar errors without string matching.
26#[derive(Debug, Clone, PartialEq, Eq)]
27pub enum ParseErrorKind {
28 /// Generic syntax / semantic error.
29 Syntax,
30 /// Recursion-depth limit exceeded during parsing.
31 DepthLimit {
32 limit_name: &'static str,
33 value: usize,
34 },
35 /// Input larger than the configured byte cap.
36 InputTooLarge {
37 limit_name: &'static str,
38 value: usize,
39 },
40 /// Identifier longer than the configured character cap.
41 IdentifierTooLong {
42 limit_name: &'static str,
43 value: usize,
44 },
45 /// Parser consumed more tokens than the configured cap.
46 TokenLimit {
47 limit_name: &'static str,
48 value: usize,
49 },
50 /// A literal value (integer / float) parsed cleanly but lies
51 /// outside the semantic range expected for its slot — e.g.
52 /// `MAX_SIZE 0`, `lat = 91.0`, `K = 0`, or a negative integer
53 /// where a positive one is required. The structured payload lets
54 /// the snapshot/property harness distinguish these from generic
55 /// syntax errors without string matching.
56 ValueOutOfRange {
57 /// Stable slot name, e.g. `"MAX_SIZE"`, `"lat"`, `"radius"`.
58 field: &'static str,
59 /// Free-text constraint, e.g. `"must be > 0"`,
60 /// `"must be in -90.0..=90.0"`.
61 constraint: &'static str,
62 },
63 /// The lexer recognized this token, but the parser does not support
64 /// it in the current grammar position.
65 UnsupportedToken { token: String },
66}
67
68impl ParseError {
69 /// Create a new parse error
70 pub fn new(message: impl Into<String>, position: Position) -> Self {
71 Self {
72 message: message.into(),
73 position,
74 expected: Vec::new(),
75 kind: ParseErrorKind::Syntax,
76 }
77 }
78
79 /// Create error with expected tokens
80 ///
81 /// `found` is rendered through [`SafeTokenDisplay`] so caller-controlled
82 /// bytes inside `Token::Ident` / `Token::String` / `Token::JsonLiteral` /
83 /// `Token::Float` / `Token::Integer` payloads are escaped via Rust's
84 /// `escape_debug` rules (CR / LF / NUL / quote bytes become `\n`,
85 /// `\r`, `\0`, `\"`, …). Static keyword and punctuation arms keep their
86 /// existing UPPER-CASE rendering so error messages and snapshot tests
87 /// stay readable. This prevents F-05 smuggling through the downstream
88 /// JSON / audit / log / gRPC sinks that embed `ParseError::message`.
89 pub fn expected(expected: Vec<&str>, found: &Token, position: Position) -> Self {
90 Self {
91 message: format!("Unexpected token: {}", SafeTokenDisplay(found)),
92 position,
93 expected: expected.into_iter().map(|s| s.to_string()).collect(),
94 kind: ParseErrorKind::Syntax,
95 }
96 }
97
98 /// Create an error when a lexer-known keyword appears in a parser
99 /// position where that keyword has no supported production.
100 pub fn unsupported_recognized_token(found: &Token, position: Position) -> Option<Self> {
101 let token = recognized_keyword_name(found)?;
102 Some(Self {
103 message: format!("token {token} is recognized but not supported in this position"),
104 position,
105 expected: Vec::new(),
106 kind: ParseErrorKind::UnsupportedToken { token },
107 })
108 }
109
110 /// Recursion depth limit hit. The structured `kind` carries the
111 /// name + numeric value so the snapshot/property harness can
112 /// pattern-match without string slicing.
113 pub fn depth_limit(limit_name: &'static str, value: usize, position: Position) -> Self {
114 Self {
115 message: format!(
116 "recursion depth limit exceeded ({} = {})",
117 limit_name, value
118 ),
119 position,
120 expected: Vec::new(),
121 kind: ParseErrorKind::DepthLimit { limit_name, value },
122 }
123 }
124
125 /// Input bytes exceeded the configured cap.
126 pub fn input_too_large(limit_name: &'static str, value: usize, position: Position) -> Self {
127 Self {
128 message: format!(
129 "input exceeds maximum size ({} = {} bytes)",
130 limit_name, value
131 ),
132 position,
133 expected: Vec::new(),
134 kind: ParseErrorKind::InputTooLarge { limit_name, value },
135 }
136 }
137
138 /// Identifier exceeded the configured character cap.
139 pub fn identifier_too_long(limit_name: &'static str, value: usize, position: Position) -> Self {
140 Self {
141 message: format!(
142 "identifier exceeds maximum length ({} = {} chars)",
143 limit_name, value
144 ),
145 position,
146 expected: Vec::new(),
147 kind: ParseErrorKind::IdentifierTooLong { limit_name, value },
148 }
149 }
150
151 /// Token budget exceeded during parsing.
152 pub fn token_limit(limit_name: &'static str, value: usize, position: Position) -> Self {
153 Self {
154 message: format!("parser token limit exceeded ({} = {})", limit_name, value),
155 position,
156 expected: Vec::new(),
157 kind: ParseErrorKind::TokenLimit { limit_name, value },
158 }
159 }
160
161 /// A literal value lies outside the allowed range for its slot.
162 /// The free-text `constraint` is included verbatim in the message
163 /// so callers can render a single line without re-formatting.
164 pub fn value_out_of_range(
165 field: &'static str,
166 constraint: &'static str,
167 position: Position,
168 ) -> Self {
169 Self {
170 message: format!("{} {}", field, constraint),
171 position,
172 expected: Vec::new(),
173 kind: ParseErrorKind::ValueOutOfRange { field, constraint },
174 }
175 }
176}
177
178fn recognized_keyword_name(token: &Token) -> Option<String> {
179 match token {
180 Token::String(_)
181 | Token::Integer(_)
182 | Token::Float(_)
183 | Token::JsonLiteral(_)
184 | Token::Ident(_)
185 | Token::Eq
186 | Token::Ne
187 | Token::Lt
188 | Token::Le
189 | Token::Gt
190 | Token::Ge
191 | Token::Plus
192 | Token::Minus
193 | Token::Star
194 | Token::Slash
195 | Token::Percent
196 | Token::LParen
197 | Token::RParen
198 | Token::LBracket
199 | Token::RBracket
200 | Token::LBrace
201 | Token::RBrace
202 | Token::Comma
203 | Token::Dot
204 | Token::Colon
205 | Token::Semi
206 | Token::Dollar
207 | Token::Question
208 | Token::Arrow
209 | Token::ArrowLeft
210 | Token::Dash
211 | Token::DotDot
212 | Token::Pipe
213 | Token::DoublePipe
214 | Token::Eof => None,
215 other => Some(SafeTokenDisplay(other).to_string()),
216 }
217}
218
219impl fmt::Display for ParseError {
220 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
221 write!(f, "Parse error at {}: {}", self.position, self.message)?;
222 if !self.expected.is_empty() {
223 write!(f, " (expected: {})", self.expected.join(", "))?;
224 }
225 Ok(())
226 }
227}
228
229impl std::error::Error for ParseError {}
230
231/// `Display` adapter that emits a `Token` while escaping the
232/// caller-controlled byte payload of `Ident` / `String` / `JsonLiteral` /
233/// `Integer` / `Float` arms.
234///
235/// F-05 (serialization-boundary audit, 2026-05-06): SQL parser error
236/// messages flow into JSON HTTP bodies, JSONL audit rows, gRPC
237/// `Status::message`, PG3 `ErrorResponse`, and `tracing::warn!` log
238/// lines. The default `Token` Display arms emit raw user bytes for
239/// `Token::Ident("foo\nbar")` etc., which lets a tenant smuggle CR /
240/// LF / NUL / quote bytes through every downstream sink at once.
241///
242/// This adapter renders user-controlled arms via `escape_debug` (the
243/// same rules `{:?}` applies to a `&str`) and leaves keyword /
244/// punctuation arms untouched so existing snapshot tests and operator
245/// log readability are preserved.
246pub struct SafeTokenDisplay<'a>(pub &'a Token);
247
248impl fmt::Display for SafeTokenDisplay<'_> {
249 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
250 match self.0 {
251 // User-controlled byte payloads. Render via `escape_debug`
252 // so embedded CR / LF / NUL / quote bytes do not reach
253 // downstream serialization sinks unescaped.
254 Token::Ident(s) => write_escaped(f, s),
255 Token::String(s) => {
256 f.write_str("'")?;
257 write_escaped(f, s)?;
258 f.write_str("'")
259 }
260 Token::JsonLiteral(s) => write_escaped(f, s),
261 // Numeric tokens come straight from the lexer; their
262 // canonical Display form is bounded ASCII, but the lexer
263 // builds them via `to_string` so they cannot carry control
264 // bytes. Pass through Display.
265 Token::Integer(_) | Token::Float(_) => fmt::Display::fmt(self.0, f),
266 // Static keyword / punctuation arms — fall back to the
267 // existing Display output verbatim.
268 other => fmt::Display::fmt(other, f),
269 }
270 }
271}
272
273fn write_escaped(f: &mut fmt::Formatter<'_>, s: &str) -> fmt::Result {
274 for ch in s.chars() {
275 // `escape_debug` matches Rust's Debug rules: ASCII control
276 // bytes become `\n`, `\r`, `\0`, `\t`, …; non-ASCII printable
277 // characters pass through; backslash and double-quote are
278 // escaped.
279 for esc in ch.escape_debug() {
280 f.write_char(esc)?;
281 }
282 }
283 Ok(())
284}
285
286impl From<LexerError> for ParseError {
287 fn from(e: LexerError) -> Self {
288 let kind = match &e.limit_hit {
289 Some(LexerLimitHit::IdentifierTooLong { limit_name, value }) => {
290 ParseErrorKind::IdentifierTooLong {
291 limit_name,
292 value: *value,
293 }
294 }
295 None => ParseErrorKind::Syntax,
296 };
297 ParseError {
298 message: e.message,
299 position: e.position,
300 expected: Vec::new(),
301 kind,
302 }
303 }
304}