reddb_server/storage/query/parser/error.rs
1//! Parser error types
2
3use std::fmt::{self, Write};
4
5use super::super::lexer::{LexerError, LexerLimitHit, Position, Token};
6
7/// Parse error
8#[derive(Debug, Clone)]
9pub struct ParseError {
10 /// Error message
11 pub message: String,
12 /// Position where error occurred
13 pub position: Position,
14 /// Expected tokens (for better error messages)
15 pub expected: Vec<String>,
16 /// Optional structured kind for hardening / DoS errors
17 pub kind: ParseErrorKind,
18}
19
20/// Categorical kind for a parse error.
21///
22/// Most parse errors are plain `Syntax` failures; the variants
23/// below carry structured information for the parser-hardening
24/// layer (issue #87) so callers can distinguish DoS-style refusals
25/// from grammar errors without string matching.
26#[derive(Debug, Clone, PartialEq, Eq)]
27pub enum ParseErrorKind {
28 /// Generic syntax / semantic error.
29 Syntax,
30 /// Recursion-depth limit exceeded during parsing.
31 DepthLimit {
32 limit_name: &'static str,
33 value: usize,
34 },
35 /// Input larger than the configured byte cap.
36 InputTooLarge {
37 limit_name: &'static str,
38 value: usize,
39 },
40 /// Identifier longer than the configured character cap.
41 IdentifierTooLong {
42 limit_name: &'static str,
43 value: usize,
44 },
45 /// A literal value (integer / float) parsed cleanly but lies
46 /// outside the semantic range expected for its slot — e.g.
47 /// `MAX_SIZE 0`, `lat = 91.0`, `K = 0`, or a negative integer
48 /// where a positive one is required. The structured payload lets
49 /// the snapshot/property harness distinguish these from generic
50 /// syntax errors without string matching.
51 ValueOutOfRange {
52 /// Stable slot name, e.g. `"MAX_SIZE"`, `"lat"`, `"radius"`.
53 field: &'static str,
54 /// Free-text constraint, e.g. `"must be > 0"`,
55 /// `"must be in -90.0..=90.0"`.
56 constraint: &'static str,
57 },
58}
59
60impl ParseError {
61 /// Create a new parse error
62 pub fn new(message: impl Into<String>, position: Position) -> Self {
63 Self {
64 message: message.into(),
65 position,
66 expected: Vec::new(),
67 kind: ParseErrorKind::Syntax,
68 }
69 }
70
71 /// Create error with expected tokens
72 ///
73 /// `found` is rendered through [`SafeTokenDisplay`] so caller-controlled
74 /// bytes inside `Token::Ident` / `Token::String` / `Token::JsonLiteral` /
75 /// `Token::Float` / `Token::Integer` payloads are escaped via Rust's
76 /// `escape_debug` rules (CR / LF / NUL / quote bytes become `\n`,
77 /// `\r`, `\0`, `\"`, …). Static keyword and punctuation arms keep their
78 /// existing UPPER-CASE rendering so error messages and snapshot tests
79 /// stay readable. This prevents F-05 smuggling through the downstream
80 /// JSON / audit / log / gRPC sinks that embed `ParseError::message`.
81 pub fn expected(expected: Vec<&str>, found: &Token, position: Position) -> Self {
82 Self {
83 message: format!("Unexpected token: {}", SafeTokenDisplay(found)),
84 position,
85 expected: expected.into_iter().map(|s| s.to_string()).collect(),
86 kind: ParseErrorKind::Syntax,
87 }
88 }
89
90 /// Recursion depth limit hit. The structured `kind` carries the
91 /// name + numeric value so the snapshot/property harness can
92 /// pattern-match without string slicing.
93 pub fn depth_limit(limit_name: &'static str, value: usize, position: Position) -> Self {
94 Self {
95 message: format!(
96 "recursion depth limit exceeded ({} = {})",
97 limit_name, value
98 ),
99 position,
100 expected: Vec::new(),
101 kind: ParseErrorKind::DepthLimit { limit_name, value },
102 }
103 }
104
105 /// Input bytes exceeded the configured cap.
106 pub fn input_too_large(limit_name: &'static str, value: usize, position: Position) -> Self {
107 Self {
108 message: format!(
109 "input exceeds maximum size ({} = {} bytes)",
110 limit_name, value
111 ),
112 position,
113 expected: Vec::new(),
114 kind: ParseErrorKind::InputTooLarge { limit_name, value },
115 }
116 }
117
118 /// Identifier exceeded the configured character cap.
119 pub fn identifier_too_long(limit_name: &'static str, value: usize, position: Position) -> Self {
120 Self {
121 message: format!(
122 "identifier exceeds maximum length ({} = {} chars)",
123 limit_name, value
124 ),
125 position,
126 expected: Vec::new(),
127 kind: ParseErrorKind::IdentifierTooLong { limit_name, value },
128 }
129 }
130
131 /// A literal value lies outside the allowed range for its slot.
132 /// The free-text `constraint` is included verbatim in the message
133 /// so callers can render a single line without re-formatting.
134 pub fn value_out_of_range(
135 field: &'static str,
136 constraint: &'static str,
137 position: Position,
138 ) -> Self {
139 Self {
140 message: format!("{} {}", field, constraint),
141 position,
142 expected: Vec::new(),
143 kind: ParseErrorKind::ValueOutOfRange { field, constraint },
144 }
145 }
146}
147
148impl fmt::Display for ParseError {
149 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
150 write!(f, "Parse error at {}: {}", self.position, self.message)?;
151 if !self.expected.is_empty() {
152 write!(f, " (expected: {})", self.expected.join(", "))?;
153 }
154 Ok(())
155 }
156}
157
158impl std::error::Error for ParseError {}
159
160/// `Display` adapter that emits a `Token` while escaping the
161/// caller-controlled byte payload of `Ident` / `String` / `JsonLiteral` /
162/// `Integer` / `Float` arms.
163///
164/// F-05 (serialization-boundary audit, 2026-05-06): SQL parser error
165/// messages flow into JSON HTTP bodies, JSONL audit rows, gRPC
166/// `Status::message`, PG3 `ErrorResponse`, and `tracing::warn!` log
167/// lines. The default `Token` Display arms emit raw user bytes for
168/// `Token::Ident("foo\nbar")` etc., which lets a tenant smuggle CR /
169/// LF / NUL / quote bytes through every downstream sink at once.
170///
171/// This adapter renders user-controlled arms via `escape_debug` (the
172/// same rules `{:?}` applies to a `&str`) and leaves keyword /
173/// punctuation arms untouched so existing snapshot tests and operator
174/// log readability are preserved.
175pub struct SafeTokenDisplay<'a>(pub &'a Token);
176
177impl fmt::Display for SafeTokenDisplay<'_> {
178 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
179 match self.0 {
180 // User-controlled byte payloads. Render via `escape_debug`
181 // so embedded CR / LF / NUL / quote bytes do not reach
182 // downstream serialization sinks unescaped.
183 Token::Ident(s) => write_escaped(f, s),
184 Token::String(s) => {
185 f.write_str("'")?;
186 write_escaped(f, s)?;
187 f.write_str("'")
188 }
189 Token::JsonLiteral(s) => write_escaped(f, s),
190 // Numeric tokens come straight from the lexer; their
191 // canonical Display form is bounded ASCII, but the lexer
192 // builds them via `to_string` so they cannot carry control
193 // bytes. Pass through Display.
194 Token::Integer(_) | Token::Float(_) => fmt::Display::fmt(self.0, f),
195 // Static keyword / punctuation arms — fall back to the
196 // existing Display output verbatim.
197 other => fmt::Display::fmt(other, f),
198 }
199 }
200}
201
202fn write_escaped(f: &mut fmt::Formatter<'_>, s: &str) -> fmt::Result {
203 for ch in s.chars() {
204 // `escape_debug` matches Rust's Debug rules: ASCII control
205 // bytes become `\n`, `\r`, `\0`, `\t`, …; non-ASCII printable
206 // characters pass through; backslash and double-quote are
207 // escaped.
208 for esc in ch.escape_debug() {
209 f.write_char(esc)?;
210 }
211 }
212 Ok(())
213}
214
215impl From<LexerError> for ParseError {
216 fn from(e: LexerError) -> Self {
217 let kind = match &e.limit_hit {
218 Some(LexerLimitHit::IdentifierTooLong { limit_name, value }) => {
219 ParseErrorKind::IdentifierTooLong {
220 limit_name,
221 value: *value,
222 }
223 }
224 None => ParseErrorKind::Syntax,
225 };
226 ParseError {
227 message: e.message,
228 position: e.position,
229 expected: Vec::new(),
230 kind,
231 }
232 }
233}