Skip to main content

reddb_server/runtime/
resource_limits.rs

1//! Operator-imposed resource limits (PLAN.md Phase 4.1).
2//!
3//! Cloud-agnostic deployments need hard upper bounds enforced
4//! regardless of cgroup or namespace presence — a process that
5//! grows past its allocated capacity slot can starve every other
6//! tenant on the same host. The limits here are read once at boot
7//! from `RED_MAX_*` env vars and held in a single immutable struct
8//! consulted by the various enforcement points (write path, accept
9//! loop, query timer, batch validator).
10//!
11//! `Option<u64>` semantics: `None` means "operator did not pin a
12//! cap at boot — fall through to whatever upstream layer (cgroup,
13//! kernel `RLIMIT_*`, runtime defaults) decides". `Some(0)` is
14//! reserved as "explicitly unbounded" so an operator who sets an
15//! env var to the literal `0` can disable a default without
16//! scripting.
17
18use std::time::Duration;
19
20/// Snapshot of the `RED_MAX_*` env vars read at runtime
21/// construction. Held by `RuntimeInner` and accessible via
22/// `RedDBRuntime::resource_limits()` so observability and
23/// enforcement consult the same values.
24#[derive(Debug, Clone, Default)]
25pub struct ResourceLimits {
26    /// Maximum primary-database file size in bytes. Writes that
27    /// would push the file past this value return
28    /// `RedDBError::QuotaExceeded` with a structured payload
29    /// (`{limit:"max_db_size", current, max}`). Operator-level
30    /// behaviour PLAN.md prescribes: returns HTTP 507 Insufficient
31    /// Storage at the public surface.
32    pub max_db_size_bytes: Option<u64>,
33
34    /// Maximum concurrent client connections. Saturated accept
35    /// loops return HTTP 503 / wire-protocol error so callers back
36    /// off cleanly.
37    pub max_connections: Option<u64>,
38
39    /// Soft memory budget in bytes. Cache eviction fires at this
40    /// threshold; the runtime never panics on OOM. `0` disables
41    /// the soft cap entirely.
42    pub max_memory_bytes: Option<u64>,
43
44    /// Maximum queries-per-second sustained per-instance. Token
45    /// bucket fires HTTP 429 / wire backoff on excess.
46    pub max_qps: Option<u64>,
47
48    /// Maximum wall time for any single query. Queries past this
49    /// threshold are killed and return HTTP 504. `None` defers to
50    /// the OS / cgroup CPU policy.
51    pub max_query_duration: Option<Duration>,
52
53    /// Maximum response payload size in bytes. Larger responses
54    /// are truncated or errored (decided by the surface).
55    pub max_result_bytes: Option<u64>,
56
57    /// Maximum rows per bulk insert / update / delete. Caps the
58    /// memory the server allocates for any one batch.
59    pub max_batch_size: Option<u64>,
60}
61
62impl ResourceLimits {
63    /// Read limits from env vars. Accepts both the cloud-agnostic
64    /// `RED_MAX_*` family (PLAN.md spec) and the legacy `REDDB_MAX_*`
65    /// form for existing dev installs. Missing or unparseable
66    /// values stay `None`. `0` is treated as "explicitly
67    /// unbounded" so operators can disable a deployment-default
68    /// cap without unsetting the env.
69    pub fn from_env() -> Self {
70        let mut out = Self {
71            max_db_size_bytes: Self::read_u64("MAX_DB_SIZE_BYTES"),
72            max_connections: Self::read_u64("MAX_CONNECTIONS"),
73            max_memory_bytes: Self::read_u64("MAX_MEMORY_MB")
74                .map(|mb| mb.saturating_mul(1_048_576)),
75            max_qps: Self::read_u64("MAX_QPS"),
76            max_query_duration: Self::read_u64("MAX_QUERY_DURATION_MS").map(Duration::from_millis),
77            max_result_bytes: Self::read_u64("MAX_RESULT_BYTES"),
78            max_batch_size: Self::read_u64("MAX_BATCH_SIZE"),
79        };
80
81        // PLAN.md Phase 4.2 — auto-detect container memory cap when
82        // the operator didn't pin one. Cgroup v2 first
83        // (`memory.max`), v1 fallback
84        // (`memory/memory.limit_in_bytes`). Cross-platform: missing
85        // files / non-Linux just leave the field `None`. The
86        // explicit env var still wins so an operator can override
87        // a too-tight cgroup detect without restructuring the
88        // container.
89        if out.max_memory_bytes.is_none() {
90            out.max_memory_bytes = read_cgroup_memory_max();
91        }
92
93        out
94    }
95
96    fn read_u64(suffix: &str) -> Option<u64> {
97        std::env::var(format!("RED_{suffix}"))
98            .or_else(|_| std::env::var(format!("REDDB_{suffix}")))
99            .ok()
100            .and_then(|raw| raw.trim().parse::<u64>().ok())
101    }
102
103    /// Whether `max_db_size_bytes` is set and `current_bytes`
104    /// exceeds it. Cheap branch — caller decides what to do
105    /// (surface-specific error code, refuse new writes, suspend).
106    pub fn db_size_exceeded(&self, current_bytes: u64) -> bool {
107        match self.max_db_size_bytes {
108            Some(limit) if limit > 0 => current_bytes > limit,
109            _ => false,
110        }
111    }
112
113    pub fn batch_size_exceeded(&self, requested: usize) -> bool {
114        match self.max_batch_size {
115            Some(limit) if limit > 0 => (requested as u64) > limit,
116            _ => false,
117        }
118    }
119
120    /// Issue #205 — disk-headroom monitor. When `available_bytes` drops
121    /// below `threshold_bytes`, emit a `DiskSpaceCritical` operator
122    /// event. Returns whether the threshold was breached so callers
123    /// can also fail the path that triggered the check. Cheap to call
124    /// per-write: the threshold check is one branch and the emit only
125    /// runs on breach.
126    pub fn check_disk_headroom(
127        &self,
128        path: &str,
129        available_bytes: u64,
130        threshold_bytes: u64,
131    ) -> bool {
132        if threshold_bytes > 0 && available_bytes < threshold_bytes {
133            crate::telemetry::operator_event::OperatorEvent::DiskSpaceCritical {
134                path: path.to_string(),
135                available_bytes,
136                threshold_bytes,
137            }
138            .emit_global();
139            true
140        } else {
141            false
142        }
143    }
144}
145
146/// Read the active cgroup memory cap, returning bytes when known.
147/// Cgroup v2 first (`/sys/fs/cgroup/memory.max`), v1 fallback
148/// (`/sys/fs/cgroup/memory/memory.limit_in_bytes`). The literal
149/// string `max` (cgroup v2 "no cap") returns `None` so the
150/// resource-limits struct stays at "no cap pinned".
151///
152/// Non-Linux / missing files / unparseable contents → `None`. Never
153/// panics; the caller treats absence as "fall through to whatever
154/// upstream layer decides".
155fn read_cgroup_memory_max() -> Option<u64> {
156    // cgroup v2
157    if let Ok(raw) = std::fs::read_to_string("/sys/fs/cgroup/memory.max") {
158        let trimmed = raw.trim();
159        if trimmed != "max" && !trimmed.is_empty() {
160            if let Ok(bytes) = trimmed.parse::<u64>() {
161                if bytes > 0 && bytes < u64::MAX / 2 {
162                    return Some(bytes);
163                }
164            }
165        }
166    }
167    // cgroup v1
168    if let Ok(raw) = std::fs::read_to_string("/sys/fs/cgroup/memory/memory.limit_in_bytes") {
169        if let Ok(bytes) = raw.trim().parse::<u64>() {
170            // Kernels report `9223372036854771712` as "unlimited" in
171            // cgroup v1; treat any value that's effectively
172            // unbounded as `None`.
173            if bytes > 0 && bytes < (u64::MAX / 2) {
174                return Some(bytes);
175            }
176        }
177    }
178    None
179}
180
181#[cfg(test)]
182mod tests {
183    use super::*;
184
185    fn env_set(key: &str, value: &str) {
186        unsafe {
187            std::env::set_var(key, value);
188        }
189    }
190    fn env_unset(key: &str) {
191        unsafe {
192            std::env::remove_var(key);
193        }
194    }
195
196    #[test]
197    fn from_env_reads_max_db_size() {
198        env_set("RED_MAX_DB_SIZE_BYTES", "1073741824");
199        let limits = ResourceLimits::from_env();
200        env_unset("RED_MAX_DB_SIZE_BYTES");
201        assert_eq!(limits.max_db_size_bytes, Some(1_073_741_824));
202    }
203
204    #[test]
205    fn legacy_reddb_prefix_is_accepted() {
206        env_set("REDDB_MAX_BATCH_SIZE", "10000");
207        let limits = ResourceLimits::from_env();
208        env_unset("REDDB_MAX_BATCH_SIZE");
209        assert_eq!(limits.max_batch_size, Some(10_000));
210    }
211
212    #[test]
213    fn unset_env_yields_no_limit() {
214        env_unset("RED_MAX_QPS");
215        env_unset("REDDB_MAX_QPS");
216        let limits = ResourceLimits::from_env();
217        assert!(limits.max_qps.is_none());
218    }
219
220    #[test]
221    fn db_size_exceeded_respects_zero_as_unbounded() {
222        let limits = ResourceLimits {
223            max_db_size_bytes: Some(0),
224            ..Default::default()
225        };
226        assert!(!limits.db_size_exceeded(u64::MAX));
227    }
228
229    #[test]
230    fn db_size_exceeded_triggers_above_limit() {
231        let limits = ResourceLimits {
232            max_db_size_bytes: Some(1024),
233            ..Default::default()
234        };
235        assert!(!limits.db_size_exceeded(1024));
236        assert!(limits.db_size_exceeded(1025));
237    }
238
239    #[test]
240    fn memory_mb_converts_to_bytes() {
241        env_set("RED_MAX_MEMORY_MB", "256");
242        let limits = ResourceLimits::from_env();
243        env_unset("RED_MAX_MEMORY_MB");
244        assert_eq!(limits.max_memory_bytes, Some(256 * 1024 * 1024));
245    }
246
247    #[test]
248    fn query_duration_parses_to_duration() {
249        env_set("RED_MAX_QUERY_DURATION_MS", "30000");
250        let limits = ResourceLimits::from_env();
251        env_unset("RED_MAX_QUERY_DURATION_MS");
252        assert_eq!(limits.max_query_duration, Some(Duration::from_secs(30)));
253    }
254}