Skip to main content

reddb_server/storage/query/parser/
error.rs

1//! Parser error types
2
3use std::fmt::{self, Write};
4
5use super::super::lexer::{LexerError, LexerLimitHit, Position, Token};
6
7/// Parse error
8#[derive(Debug, Clone)]
9pub struct ParseError {
10    /// Error message
11    pub message: String,
12    /// Position where error occurred
13    pub position: Position,
14    /// Expected tokens (for better error messages)
15    pub expected: Vec<String>,
16    /// Optional structured kind for hardening / DoS errors
17    pub kind: ParseErrorKind,
18}
19
20/// Categorical kind for a parse error.
21///
22/// Most parse errors are plain `Syntax` failures; the variants
23/// below carry structured information for the parser-hardening
24/// layer (issue #87) so callers can distinguish DoS-style refusals
25/// from grammar errors without string matching.
26#[derive(Debug, Clone, PartialEq, Eq)]
27pub enum ParseErrorKind {
28    /// Generic syntax / semantic error.
29    Syntax,
30    /// Recursion-depth limit exceeded during parsing.
31    DepthLimit {
32        limit_name: &'static str,
33        value: usize,
34    },
35    /// Input larger than the configured byte cap.
36    InputTooLarge {
37        limit_name: &'static str,
38        value: usize,
39    },
40    /// Identifier longer than the configured character cap.
41    IdentifierTooLong {
42        limit_name: &'static str,
43        value: usize,
44    },
45    /// A literal value (integer / float) parsed cleanly but lies
46    /// outside the semantic range expected for its slot — e.g.
47    /// `MAX_SIZE 0`, `lat = 91.0`, `K = 0`, or a negative integer
48    /// where a positive one is required. The structured payload lets
49    /// the snapshot/property harness distinguish these from generic
50    /// syntax errors without string matching.
51    ValueOutOfRange {
52        /// Stable slot name, e.g. `"MAX_SIZE"`, `"lat"`, `"radius"`.
53        field: &'static str,
54        /// Free-text constraint, e.g. `"must be > 0"`,
55        /// `"must be in -90.0..=90.0"`.
56        constraint: &'static str,
57    },
58    /// The lexer recognized this token, but the parser does not support
59    /// it in the current grammar position.
60    UnsupportedToken { token: String },
61}
62
63impl ParseError {
64    /// Create a new parse error
65    pub fn new(message: impl Into<String>, position: Position) -> Self {
66        Self {
67            message: message.into(),
68            position,
69            expected: Vec::new(),
70            kind: ParseErrorKind::Syntax,
71        }
72    }
73
74    /// Create error with expected tokens
75    ///
76    /// `found` is rendered through [`SafeTokenDisplay`] so caller-controlled
77    /// bytes inside `Token::Ident` / `Token::String` / `Token::JsonLiteral` /
78    /// `Token::Float` / `Token::Integer` payloads are escaped via Rust's
79    /// `escape_debug` rules (CR / LF / NUL / quote bytes become `\n`,
80    /// `\r`, `\0`, `\"`, …). Static keyword and punctuation arms keep their
81    /// existing UPPER-CASE rendering so error messages and snapshot tests
82    /// stay readable. This prevents F-05 smuggling through the downstream
83    /// JSON / audit / log / gRPC sinks that embed `ParseError::message`.
84    pub fn expected(expected: Vec<&str>, found: &Token, position: Position) -> Self {
85        Self {
86            message: format!("Unexpected token: {}", SafeTokenDisplay(found)),
87            position,
88            expected: expected.into_iter().map(|s| s.to_string()).collect(),
89            kind: ParseErrorKind::Syntax,
90        }
91    }
92
93    /// Create an error when a lexer-known keyword appears in a parser
94    /// position where that keyword has no supported production.
95    pub fn unsupported_recognized_token(found: &Token, position: Position) -> Option<Self> {
96        let token = recognized_keyword_name(found)?;
97        Some(Self {
98            message: format!("token {token} is recognized but not supported in this position"),
99            position,
100            expected: Vec::new(),
101            kind: ParseErrorKind::UnsupportedToken { token },
102        })
103    }
104
105    /// Recursion depth limit hit. The structured `kind` carries the
106    /// name + numeric value so the snapshot/property harness can
107    /// pattern-match without string slicing.
108    pub fn depth_limit(limit_name: &'static str, value: usize, position: Position) -> Self {
109        Self {
110            message: format!(
111                "recursion depth limit exceeded ({} = {})",
112                limit_name, value
113            ),
114            position,
115            expected: Vec::new(),
116            kind: ParseErrorKind::DepthLimit { limit_name, value },
117        }
118    }
119
120    /// Input bytes exceeded the configured cap.
121    pub fn input_too_large(limit_name: &'static str, value: usize, position: Position) -> Self {
122        Self {
123            message: format!(
124                "input exceeds maximum size ({} = {} bytes)",
125                limit_name, value
126            ),
127            position,
128            expected: Vec::new(),
129            kind: ParseErrorKind::InputTooLarge { limit_name, value },
130        }
131    }
132
133    /// Identifier exceeded the configured character cap.
134    pub fn identifier_too_long(limit_name: &'static str, value: usize, position: Position) -> Self {
135        Self {
136            message: format!(
137                "identifier exceeds maximum length ({} = {} chars)",
138                limit_name, value
139            ),
140            position,
141            expected: Vec::new(),
142            kind: ParseErrorKind::IdentifierTooLong { limit_name, value },
143        }
144    }
145
146    /// A literal value lies outside the allowed range for its slot.
147    /// The free-text `constraint` is included verbatim in the message
148    /// so callers can render a single line without re-formatting.
149    pub fn value_out_of_range(
150        field: &'static str,
151        constraint: &'static str,
152        position: Position,
153    ) -> Self {
154        Self {
155            message: format!("{} {}", field, constraint),
156            position,
157            expected: Vec::new(),
158            kind: ParseErrorKind::ValueOutOfRange { field, constraint },
159        }
160    }
161}
162
163fn recognized_keyword_name(token: &Token) -> Option<String> {
164    match token {
165        Token::String(_)
166        | Token::Integer(_)
167        | Token::Float(_)
168        | Token::JsonLiteral(_)
169        | Token::Ident(_)
170        | Token::Eq
171        | Token::Ne
172        | Token::Lt
173        | Token::Le
174        | Token::Gt
175        | Token::Ge
176        | Token::Plus
177        | Token::Minus
178        | Token::Star
179        | Token::Slash
180        | Token::Percent
181        | Token::LParen
182        | Token::RParen
183        | Token::LBracket
184        | Token::RBracket
185        | Token::LBrace
186        | Token::RBrace
187        | Token::Comma
188        | Token::Dot
189        | Token::Colon
190        | Token::Semi
191        | Token::Dollar
192        | Token::Question
193        | Token::Arrow
194        | Token::ArrowLeft
195        | Token::Dash
196        | Token::DotDot
197        | Token::Pipe
198        | Token::DoublePipe
199        | Token::Eof => None,
200        other => Some(SafeTokenDisplay(other).to_string()),
201    }
202}
203
204impl fmt::Display for ParseError {
205    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
206        write!(f, "Parse error at {}: {}", self.position, self.message)?;
207        if !self.expected.is_empty() {
208            write!(f, " (expected: {})", self.expected.join(", "))?;
209        }
210        Ok(())
211    }
212}
213
214impl std::error::Error for ParseError {}
215
216/// `Display` adapter that emits a `Token` while escaping the
217/// caller-controlled byte payload of `Ident` / `String` / `JsonLiteral` /
218/// `Integer` / `Float` arms.
219///
220/// F-05 (serialization-boundary audit, 2026-05-06): SQL parser error
221/// messages flow into JSON HTTP bodies, JSONL audit rows, gRPC
222/// `Status::message`, PG3 `ErrorResponse`, and `tracing::warn!` log
223/// lines. The default `Token` Display arms emit raw user bytes for
224/// `Token::Ident("foo\nbar")` etc., which lets a tenant smuggle CR /
225/// LF / NUL / quote bytes through every downstream sink at once.
226///
227/// This adapter renders user-controlled arms via `escape_debug` (the
228/// same rules `{:?}` applies to a `&str`) and leaves keyword /
229/// punctuation arms untouched so existing snapshot tests and operator
230/// log readability are preserved.
231pub struct SafeTokenDisplay<'a>(pub &'a Token);
232
233impl fmt::Display for SafeTokenDisplay<'_> {
234    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
235        match self.0 {
236            // User-controlled byte payloads. Render via `escape_debug`
237            // so embedded CR / LF / NUL / quote bytes do not reach
238            // downstream serialization sinks unescaped.
239            Token::Ident(s) => write_escaped(f, s),
240            Token::String(s) => {
241                f.write_str("'")?;
242                write_escaped(f, s)?;
243                f.write_str("'")
244            }
245            Token::JsonLiteral(s) => write_escaped(f, s),
246            // Numeric tokens come straight from the lexer; their
247            // canonical Display form is bounded ASCII, but the lexer
248            // builds them via `to_string` so they cannot carry control
249            // bytes. Pass through Display.
250            Token::Integer(_) | Token::Float(_) => fmt::Display::fmt(self.0, f),
251            // Static keyword / punctuation arms — fall back to the
252            // existing Display output verbatim.
253            other => fmt::Display::fmt(other, f),
254        }
255    }
256}
257
258fn write_escaped(f: &mut fmt::Formatter<'_>, s: &str) -> fmt::Result {
259    for ch in s.chars() {
260        // `escape_debug` matches Rust's Debug rules: ASCII control
261        // bytes become `\n`, `\r`, `\0`, `\t`, …; non-ASCII printable
262        // characters pass through; backslash and double-quote are
263        // escaped.
264        for esc in ch.escape_debug() {
265            f.write_char(esc)?;
266        }
267    }
268    Ok(())
269}
270
271impl From<LexerError> for ParseError {
272    fn from(e: LexerError) -> Self {
273        let kind = match &e.limit_hit {
274            Some(LexerLimitHit::IdentifierTooLong { limit_name, value }) => {
275                ParseErrorKind::IdentifierTooLong {
276                    limit_name,
277                    value: *value,
278                }
279            }
280            None => ParseErrorKind::Syntax,
281        };
282        ParseError {
283            message: e.message,
284            position: e.position,
285            expected: Vec::new(),
286            kind,
287        }
288    }
289}