reddb_server/storage/query/parser/error.rs
1//! Parser error types
2
3use std::fmt::{self, Write};
4
5use super::super::lexer::{LexerError, LexerLimitHit, Position, Token};
6
7/// Parse error
8#[derive(Debug, Clone)]
9pub struct ParseError {
10 /// Error message
11 pub message: String,
12 /// Position where error occurred
13 pub position: Position,
14 /// Expected tokens (for better error messages)
15 pub expected: Vec<String>,
16 /// Optional structured kind for hardening / DoS errors
17 pub kind: ParseErrorKind,
18}
19
20/// Categorical kind for a parse error.
21///
22/// Most parse errors are plain `Syntax` failures; the variants
23/// below carry structured information for the parser-hardening
24/// layer (issue #87) so callers can distinguish DoS-style refusals
25/// from grammar errors without string matching.
26#[derive(Debug, Clone, PartialEq, Eq)]
27pub enum ParseErrorKind {
28 /// Generic syntax / semantic error.
29 Syntax,
30 /// Recursion-depth limit exceeded during parsing.
31 DepthLimit {
32 limit_name: &'static str,
33 value: usize,
34 },
35 /// Input larger than the configured byte cap.
36 InputTooLarge {
37 limit_name: &'static str,
38 value: usize,
39 },
40 /// Identifier longer than the configured character cap.
41 IdentifierTooLong {
42 limit_name: &'static str,
43 value: usize,
44 },
45 /// A literal value (integer / float) parsed cleanly but lies
46 /// outside the semantic range expected for its slot — e.g.
47 /// `MAX_SIZE 0`, `lat = 91.0`, `K = 0`, or a negative integer
48 /// where a positive one is required. The structured payload lets
49 /// the snapshot/property harness distinguish these from generic
50 /// syntax errors without string matching.
51 ValueOutOfRange {
52 /// Stable slot name, e.g. `"MAX_SIZE"`, `"lat"`, `"radius"`.
53 field: &'static str,
54 /// Free-text constraint, e.g. `"must be > 0"`,
55 /// `"must be in -90.0..=90.0"`.
56 constraint: &'static str,
57 },
58 /// The lexer recognized this token, but the parser does not support
59 /// it in the current grammar position.
60 UnsupportedToken { token: String },
61}
62
63impl ParseError {
64 /// Create a new parse error
65 pub fn new(message: impl Into<String>, position: Position) -> Self {
66 Self {
67 message: message.into(),
68 position,
69 expected: Vec::new(),
70 kind: ParseErrorKind::Syntax,
71 }
72 }
73
74 /// Create error with expected tokens
75 ///
76 /// `found` is rendered through [`SafeTokenDisplay`] so caller-controlled
77 /// bytes inside `Token::Ident` / `Token::String` / `Token::JsonLiteral` /
78 /// `Token::Float` / `Token::Integer` payloads are escaped via Rust's
79 /// `escape_debug` rules (CR / LF / NUL / quote bytes become `\n`,
80 /// `\r`, `\0`, `\"`, …). Static keyword and punctuation arms keep their
81 /// existing UPPER-CASE rendering so error messages and snapshot tests
82 /// stay readable. This prevents F-05 smuggling through the downstream
83 /// JSON / audit / log / gRPC sinks that embed `ParseError::message`.
84 pub fn expected(expected: Vec<&str>, found: &Token, position: Position) -> Self {
85 Self {
86 message: format!("Unexpected token: {}", SafeTokenDisplay(found)),
87 position,
88 expected: expected.into_iter().map(|s| s.to_string()).collect(),
89 kind: ParseErrorKind::Syntax,
90 }
91 }
92
93 /// Create an error when a lexer-known keyword appears in a parser
94 /// position where that keyword has no supported production.
95 pub fn unsupported_recognized_token(found: &Token, position: Position) -> Option<Self> {
96 let token = recognized_keyword_name(found)?;
97 Some(Self {
98 message: format!("token {token} is recognized but not supported in this position"),
99 position,
100 expected: Vec::new(),
101 kind: ParseErrorKind::UnsupportedToken { token },
102 })
103 }
104
105 /// Recursion depth limit hit. The structured `kind` carries the
106 /// name + numeric value so the snapshot/property harness can
107 /// pattern-match without string slicing.
108 pub fn depth_limit(limit_name: &'static str, value: usize, position: Position) -> Self {
109 Self {
110 message: format!(
111 "recursion depth limit exceeded ({} = {})",
112 limit_name, value
113 ),
114 position,
115 expected: Vec::new(),
116 kind: ParseErrorKind::DepthLimit { limit_name, value },
117 }
118 }
119
120 /// Input bytes exceeded the configured cap.
121 pub fn input_too_large(limit_name: &'static str, value: usize, position: Position) -> Self {
122 Self {
123 message: format!(
124 "input exceeds maximum size ({} = {} bytes)",
125 limit_name, value
126 ),
127 position,
128 expected: Vec::new(),
129 kind: ParseErrorKind::InputTooLarge { limit_name, value },
130 }
131 }
132
133 /// Identifier exceeded the configured character cap.
134 pub fn identifier_too_long(limit_name: &'static str, value: usize, position: Position) -> Self {
135 Self {
136 message: format!(
137 "identifier exceeds maximum length ({} = {} chars)",
138 limit_name, value
139 ),
140 position,
141 expected: Vec::new(),
142 kind: ParseErrorKind::IdentifierTooLong { limit_name, value },
143 }
144 }
145
146 /// A literal value lies outside the allowed range for its slot.
147 /// The free-text `constraint` is included verbatim in the message
148 /// so callers can render a single line without re-formatting.
149 pub fn value_out_of_range(
150 field: &'static str,
151 constraint: &'static str,
152 position: Position,
153 ) -> Self {
154 Self {
155 message: format!("{} {}", field, constraint),
156 position,
157 expected: Vec::new(),
158 kind: ParseErrorKind::ValueOutOfRange { field, constraint },
159 }
160 }
161}
162
163fn recognized_keyword_name(token: &Token) -> Option<String> {
164 match token {
165 Token::String(_)
166 | Token::Integer(_)
167 | Token::Float(_)
168 | Token::JsonLiteral(_)
169 | Token::Ident(_)
170 | Token::Eq
171 | Token::Ne
172 | Token::Lt
173 | Token::Le
174 | Token::Gt
175 | Token::Ge
176 | Token::Plus
177 | Token::Minus
178 | Token::Star
179 | Token::Slash
180 | Token::Percent
181 | Token::LParen
182 | Token::RParen
183 | Token::LBracket
184 | Token::RBracket
185 | Token::LBrace
186 | Token::RBrace
187 | Token::Comma
188 | Token::Dot
189 | Token::Colon
190 | Token::Semi
191 | Token::Dollar
192 | Token::Question
193 | Token::Arrow
194 | Token::ArrowLeft
195 | Token::Dash
196 | Token::DotDot
197 | Token::Pipe
198 | Token::DoublePipe
199 | Token::Eof => None,
200 other => Some(SafeTokenDisplay(other).to_string()),
201 }
202}
203
204impl fmt::Display for ParseError {
205 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
206 write!(f, "Parse error at {}: {}", self.position, self.message)?;
207 if !self.expected.is_empty() {
208 write!(f, " (expected: {})", self.expected.join(", "))?;
209 }
210 Ok(())
211 }
212}
213
214impl std::error::Error for ParseError {}
215
216/// `Display` adapter that emits a `Token` while escaping the
217/// caller-controlled byte payload of `Ident` / `String` / `JsonLiteral` /
218/// `Integer` / `Float` arms.
219///
220/// F-05 (serialization-boundary audit, 2026-05-06): SQL parser error
221/// messages flow into JSON HTTP bodies, JSONL audit rows, gRPC
222/// `Status::message`, PG3 `ErrorResponse`, and `tracing::warn!` log
223/// lines. The default `Token` Display arms emit raw user bytes for
224/// `Token::Ident("foo\nbar")` etc., which lets a tenant smuggle CR /
225/// LF / NUL / quote bytes through every downstream sink at once.
226///
227/// This adapter renders user-controlled arms via `escape_debug` (the
228/// same rules `{:?}` applies to a `&str`) and leaves keyword /
229/// punctuation arms untouched so existing snapshot tests and operator
230/// log readability are preserved.
231pub struct SafeTokenDisplay<'a>(pub &'a Token);
232
233impl fmt::Display for SafeTokenDisplay<'_> {
234 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
235 match self.0 {
236 // User-controlled byte payloads. Render via `escape_debug`
237 // so embedded CR / LF / NUL / quote bytes do not reach
238 // downstream serialization sinks unescaped.
239 Token::Ident(s) => write_escaped(f, s),
240 Token::String(s) => {
241 f.write_str("'")?;
242 write_escaped(f, s)?;
243 f.write_str("'")
244 }
245 Token::JsonLiteral(s) => write_escaped(f, s),
246 // Numeric tokens come straight from the lexer; their
247 // canonical Display form is bounded ASCII, but the lexer
248 // builds them via `to_string` so they cannot carry control
249 // bytes. Pass through Display.
250 Token::Integer(_) | Token::Float(_) => fmt::Display::fmt(self.0, f),
251 // Static keyword / punctuation arms — fall back to the
252 // existing Display output verbatim.
253 other => fmt::Display::fmt(other, f),
254 }
255 }
256}
257
258fn write_escaped(f: &mut fmt::Formatter<'_>, s: &str) -> fmt::Result {
259 for ch in s.chars() {
260 // `escape_debug` matches Rust's Debug rules: ASCII control
261 // bytes become `\n`, `\r`, `\0`, `\t`, …; non-ASCII printable
262 // characters pass through; backslash and double-quote are
263 // escaped.
264 for esc in ch.escape_debug() {
265 f.write_char(esc)?;
266 }
267 }
268 Ok(())
269}
270
271impl From<LexerError> for ParseError {
272 fn from(e: LexerError) -> Self {
273 let kind = match &e.limit_hit {
274 Some(LexerLimitHit::IdentifierTooLong { limit_name, value }) => {
275 ParseErrorKind::IdentifierTooLong {
276 limit_name,
277 value: *value,
278 }
279 }
280 None => ParseErrorKind::Syntax,
281 };
282 ParseError {
283 message: e.message,
284 position: e.position,
285 expected: Vec::new(),
286 kind,
287 }
288 }
289}