Skip to main content

reddb_server/storage/query/parser/
error.rs

1//! Parser error types
2
3use std::fmt::{self, Write};
4
5use super::super::lexer::{LexerError, LexerLimitHit, Position, Token};
6
7/// Parse error
8#[derive(Debug, Clone)]
9pub struct ParseError {
10    /// Error message
11    pub message: String,
12    /// Position where error occurred
13    pub position: Position,
14    /// Expected tokens (for better error messages)
15    pub expected: Vec<String>,
16    /// Optional structured kind for hardening / DoS errors
17    pub kind: ParseErrorKind,
18}
19
20/// Categorical kind for a parse error.
21///
22/// Most parse errors are plain `Syntax` failures; the variants
23/// below carry structured information for the parser-hardening
24/// layer (issue #87) so callers can distinguish DoS-style refusals
25/// from grammar errors without string matching.
26#[derive(Debug, Clone, PartialEq, Eq)]
27pub enum ParseErrorKind {
28    /// Generic syntax / semantic error.
29    Syntax,
30    /// Recursion-depth limit exceeded during parsing.
31    DepthLimit {
32        limit_name: &'static str,
33        value: usize,
34    },
35    /// Input larger than the configured byte cap.
36    InputTooLarge {
37        limit_name: &'static str,
38        value: usize,
39    },
40    /// Identifier longer than the configured character cap.
41    IdentifierTooLong {
42        limit_name: &'static str,
43        value: usize,
44    },
45    /// A literal value (integer / float) parsed cleanly but lies
46    /// outside the semantic range expected for its slot — e.g.
47    /// `MAX_SIZE 0`, `lat = 91.0`, `K = 0`, or a negative integer
48    /// where a positive one is required. The structured payload lets
49    /// the snapshot/property harness distinguish these from generic
50    /// syntax errors without string matching.
51    ValueOutOfRange {
52        /// Stable slot name, e.g. `"MAX_SIZE"`, `"lat"`, `"radius"`.
53        field: &'static str,
54        /// Free-text constraint, e.g. `"must be > 0"`,
55        /// `"must be in -90.0..=90.0"`.
56        constraint: &'static str,
57    },
58}
59
60impl ParseError {
61    /// Create a new parse error
62    pub fn new(message: impl Into<String>, position: Position) -> Self {
63        Self {
64            message: message.into(),
65            position,
66            expected: Vec::new(),
67            kind: ParseErrorKind::Syntax,
68        }
69    }
70
71    /// Create error with expected tokens
72    ///
73    /// `found` is rendered through [`SafeTokenDisplay`] so caller-controlled
74    /// bytes inside `Token::Ident` / `Token::String` / `Token::JsonLiteral` /
75    /// `Token::Float` / `Token::Integer` payloads are escaped via Rust's
76    /// `escape_debug` rules (CR / LF / NUL / quote bytes become `\n`,
77    /// `\r`, `\0`, `\"`, …). Static keyword and punctuation arms keep their
78    /// existing UPPER-CASE rendering so error messages and snapshot tests
79    /// stay readable. This prevents F-05 smuggling through the downstream
80    /// JSON / audit / log / gRPC sinks that embed `ParseError::message`.
81    pub fn expected(expected: Vec<&str>, found: &Token, position: Position) -> Self {
82        Self {
83            message: format!("Unexpected token: {}", SafeTokenDisplay(found)),
84            position,
85            expected: expected.into_iter().map(|s| s.to_string()).collect(),
86            kind: ParseErrorKind::Syntax,
87        }
88    }
89
90    /// Recursion depth limit hit. The structured `kind` carries the
91    /// name + numeric value so the snapshot/property harness can
92    /// pattern-match without string slicing.
93    pub fn depth_limit(limit_name: &'static str, value: usize, position: Position) -> Self {
94        Self {
95            message: format!(
96                "recursion depth limit exceeded ({} = {})",
97                limit_name, value
98            ),
99            position,
100            expected: Vec::new(),
101            kind: ParseErrorKind::DepthLimit { limit_name, value },
102        }
103    }
104
105    /// Input bytes exceeded the configured cap.
106    pub fn input_too_large(limit_name: &'static str, value: usize, position: Position) -> Self {
107        Self {
108            message: format!(
109                "input exceeds maximum size ({} = {} bytes)",
110                limit_name, value
111            ),
112            position,
113            expected: Vec::new(),
114            kind: ParseErrorKind::InputTooLarge { limit_name, value },
115        }
116    }
117
118    /// Identifier exceeded the configured character cap.
119    pub fn identifier_too_long(limit_name: &'static str, value: usize, position: Position) -> Self {
120        Self {
121            message: format!(
122                "identifier exceeds maximum length ({} = {} chars)",
123                limit_name, value
124            ),
125            position,
126            expected: Vec::new(),
127            kind: ParseErrorKind::IdentifierTooLong { limit_name, value },
128        }
129    }
130
131    /// A literal value lies outside the allowed range for its slot.
132    /// The free-text `constraint` is included verbatim in the message
133    /// so callers can render a single line without re-formatting.
134    pub fn value_out_of_range(
135        field: &'static str,
136        constraint: &'static str,
137        position: Position,
138    ) -> Self {
139        Self {
140            message: format!("{} {}", field, constraint),
141            position,
142            expected: Vec::new(),
143            kind: ParseErrorKind::ValueOutOfRange { field, constraint },
144        }
145    }
146}
147
148impl fmt::Display for ParseError {
149    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
150        write!(f, "Parse error at {}: {}", self.position, self.message)?;
151        if !self.expected.is_empty() {
152            write!(f, " (expected: {})", self.expected.join(", "))?;
153        }
154        Ok(())
155    }
156}
157
158impl std::error::Error for ParseError {}
159
160/// `Display` adapter that emits a `Token` while escaping the
161/// caller-controlled byte payload of `Ident` / `String` / `JsonLiteral` /
162/// `Integer` / `Float` arms.
163///
164/// F-05 (serialization-boundary audit, 2026-05-06): SQL parser error
165/// messages flow into JSON HTTP bodies, JSONL audit rows, gRPC
166/// `Status::message`, PG3 `ErrorResponse`, and `tracing::warn!` log
167/// lines. The default `Token` Display arms emit raw user bytes for
168/// `Token::Ident("foo\nbar")` etc., which lets a tenant smuggle CR /
169/// LF / NUL / quote bytes through every downstream sink at once.
170///
171/// This adapter renders user-controlled arms via `escape_debug` (the
172/// same rules `{:?}` applies to a `&str`) and leaves keyword /
173/// punctuation arms untouched so existing snapshot tests and operator
174/// log readability are preserved.
175pub struct SafeTokenDisplay<'a>(pub &'a Token);
176
177impl fmt::Display for SafeTokenDisplay<'_> {
178    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
179        match self.0 {
180            // User-controlled byte payloads. Render via `escape_debug`
181            // so embedded CR / LF / NUL / quote bytes do not reach
182            // downstream serialization sinks unescaped.
183            Token::Ident(s) => write_escaped(f, s),
184            Token::String(s) => {
185                f.write_str("'")?;
186                write_escaped(f, s)?;
187                f.write_str("'")
188            }
189            Token::JsonLiteral(s) => write_escaped(f, s),
190            // Numeric tokens come straight from the lexer; their
191            // canonical Display form is bounded ASCII, but the lexer
192            // builds them via `to_string` so they cannot carry control
193            // bytes. Pass through Display.
194            Token::Integer(_) | Token::Float(_) => fmt::Display::fmt(self.0, f),
195            // Static keyword / punctuation arms — fall back to the
196            // existing Display output verbatim.
197            other => fmt::Display::fmt(other, f),
198        }
199    }
200}
201
202fn write_escaped(f: &mut fmt::Formatter<'_>, s: &str) -> fmt::Result {
203    for ch in s.chars() {
204        // `escape_debug` matches Rust's Debug rules: ASCII control
205        // bytes become `\n`, `\r`, `\0`, `\t`, …; non-ASCII printable
206        // characters pass through; backslash and double-quote are
207        // escaped.
208        for esc in ch.escape_debug() {
209            f.write_char(esc)?;
210        }
211    }
212    Ok(())
213}
214
215impl From<LexerError> for ParseError {
216    fn from(e: LexerError) -> Self {
217        let kind = match &e.limit_hit {
218            Some(LexerLimitHit::IdentifierTooLong { limit_name, value }) => {
219                ParseErrorKind::IdentifierTooLong {
220                    limit_name,
221                    value: *value,
222                }
223            }
224            None => ParseErrorKind::Syntax,
225        };
226        ParseError {
227            message: e.message,
228            position: e.position,
229            expected: Vec::new(),
230            kind,
231        }
232    }
233}