Skip to main content

reddb_rql/
limits.rs

1//! Front-end DoS limits.
2//!
3//! These limits are uniformly applied at parser entry points so a
4//! malicious query string can't exhaust recursion stack, RAM, or
5//! identifier bookkeeping. Limit values are documented in
6//! `docs/security/parser-limits.md` (issue #87).
7//!
8//! # Defaults
9//!
10//! | Limit                 | Default | Rationale                                       |
11//! |-----------------------|---------|-------------------------------------------------|
12//! | `max_depth`           | 32      | Recursive descent + Pratt; above typical        |
13//! |                       |         | hand-written queries (≤ 12).                     |
14//! | `max_input_bytes`     | 1 MiB   | Hard cap on the token stream input.              |
15//! | `max_identifier_chars`| 256     | Long enough for legitimate UUID-tagged names,    |
16//! |                       |         | short enough to bound HashMap pressure.          |
17//! | `max_tokens`          | 8192    | Bounds token-driven parser work even when input  |
18//! |                       |         | bytes and recursion depth stay below their caps. |
19//!
20//! `ParserLimits` is consumed both by the [`crate::lexer`] (identifier and
21//! input-byte caps, checked during tokenization) and by the parser proper
22//! (recursion-depth cap), which still lives in `reddb-server` and reaches
23//! this type through its re-export shim.
24
25/// Hard limits enforced by the front-end.
26///
27/// The fields are public so the harness module (used by tests in
28/// `tests/support/parser_hardening`) can mutate them inline. Default
29/// values match production defaults.
30#[derive(Debug, Clone, Copy, PartialEq, Eq)]
31pub struct ParserLimits {
32    /// Maximum recursion depth across recursive descent points
33    /// (expressions, parenthesised sub-queries, JOIN chains).
34    pub max_depth: usize,
35    /// Maximum input length in bytes. Checked at the lexer entry
36    /// before tokenization begins.
37    pub max_input_bytes: usize,
38    /// Maximum identifier length in characters. Checked when an
39    /// identifier token is constructed in the lexer.
40    pub max_identifier_chars: usize,
41    /// Maximum number of tokens the parser may consume. This bounds
42    /// flat adversarial inputs such as long operator chains that do
43    /// not trip byte, identifier, or recursion-depth limits.
44    pub max_tokens: usize,
45}
46
47impl Default for ParserLimits {
48    fn default() -> Self {
49        Self {
50            max_depth: 32,
51            max_input_bytes: 1024 * 1024, // 1 MiB
52            max_identifier_chars: 256,
53            max_tokens: 8192,
54        }
55    }
56}
57
58impl ParserLimits {
59    /// Permissive limits for tests that intentionally probe deep
60    /// nesting or long inputs without tripping DoS guards.
61    pub fn permissive() -> Self {
62        Self {
63            max_depth: 1024,
64            max_input_bytes: 16 * 1024 * 1024,
65            max_identifier_chars: 4096,
66            max_tokens: 65_536,
67        }
68    }
69}
70
71/// Maximum nesting depth for JSON object literals, validated after
72/// parsing by [`crate::parser::dml::json_literal_depth_check`] using
73/// an iterative stack walk.
74///
75/// Defined here — alongside [`ParserLimits`] and [`DepthCounter`] —
76/// so every depth-cap constant is co-located in one module. Expression
77/// and subquery nesting are guarded inline by
78/// [`crate::parser::Parser::enter_depth`] /
79/// [`crate::parser::Parser::exit_depth`] against
80/// [`ParserLimits::max_depth`].
81pub const JSON_LITERAL_MAX_DEPTH: u32 = 128;