reddb_server/runtime/resource_limits.rs
1//! Operator-imposed resource limits (PLAN.md Phase 4.1).
2//!
3//! Cloud-agnostic deployments need hard upper bounds enforced
4//! regardless of cgroup or namespace presence — a process that
5//! grows past its allocated capacity slot can starve every other
6//! tenant on the same host. The limits here are read once at boot
7//! from `RED_MAX_*` env vars and held in a single immutable struct
8//! consulted by the various enforcement points (write path, accept
9//! loop, query timer, batch validator).
10//!
11//! `Option<u64>` semantics: `None` means "operator did not pin a
12//! cap at boot — fall through to whatever upstream layer (cgroup,
13//! kernel `RLIMIT_*`, runtime defaults) decides". `Some(0)` is
14//! reserved as "explicitly unbounded" so an operator who sets an
15//! env var to the literal `0` can disable a default without
16//! scripting.
17
18use std::time::Duration;
19
20/// Snapshot of the `RED_MAX_*` env vars read at runtime
21/// construction. Held by `RuntimeInner` and accessible via
22/// `RedDBRuntime::resource_limits()` so observability and
23/// enforcement consult the same values.
24#[derive(Debug, Clone, Default)]
25pub struct ResourceLimits {
26 /// Maximum primary-database file size in bytes. Writes that
27 /// would push the file past this value return
28 /// `RedDBError::QuotaExceeded` with a structured payload
29 /// (`{limit:"max_db_size", current, max}`). Operator-level
30 /// behaviour PLAN.md prescribes: returns HTTP 507 Insufficient
31 /// Storage at the public surface.
32 pub max_db_size_bytes: Option<u64>,
33
34 /// Maximum concurrent client connections. Saturated accept
35 /// loops return HTTP 503 / wire-protocol error so callers back
36 /// off cleanly.
37 pub max_connections: Option<u64>,
38
39 /// Soft memory budget in bytes. Cache eviction fires at this
40 /// threshold; the runtime never panics on OOM. `0` disables
41 /// the soft cap entirely.
42 pub max_memory_bytes: Option<u64>,
43
44 /// Maximum queries-per-second sustained per-instance. Token
45 /// bucket fires HTTP 429 / wire backoff on excess.
46 pub max_qps: Option<u64>,
47
48 /// Maximum wall time for any single query. Queries past this
49 /// threshold are killed and return HTTP 504. `None` defers to
50 /// the OS / cgroup CPU policy.
51 pub max_query_duration: Option<Duration>,
52
53 /// Maximum response payload size in bytes. Larger responses
54 /// are truncated or errored (decided by the surface).
55 pub max_result_bytes: Option<u64>,
56
57 /// Maximum rows per bulk insert / update / delete. Caps the
58 /// memory the server allocates for any one batch.
59 pub max_batch_size: Option<u64>,
60}
61
62impl ResourceLimits {
63 /// Read limits from env vars. Accepts both the cloud-agnostic
64 /// `RED_MAX_*` family (PLAN.md spec) and the legacy `REDDB_MAX_*`
65 /// form for existing dev installs. Missing or unparseable
66 /// values stay `None`. `0` is treated as "explicitly
67 /// unbounded" so operators can disable a deployment-default
68 /// cap without unsetting the env.
69 pub fn from_env() -> Self {
70 let mut out = Self {
71 max_db_size_bytes: Self::read_u64("MAX_DB_SIZE_BYTES"),
72 max_connections: Self::read_u64("MAX_CONNECTIONS"),
73 max_memory_bytes: Self::read_u64("MAX_MEMORY_MB")
74 .map(|mb| mb.saturating_mul(1_048_576)),
75 max_qps: Self::read_u64("MAX_QPS"),
76 max_query_duration: Self::read_u64("MAX_QUERY_DURATION_MS").map(Duration::from_millis),
77 max_result_bytes: Self::read_u64("MAX_RESULT_BYTES"),
78 max_batch_size: Self::read_u64("MAX_BATCH_SIZE"),
79 };
80
81 // PLAN.md Phase 4.2 — auto-detect container memory cap when
82 // the operator didn't pin one. Cgroup v2 first
83 // (`memory.max`), v1 fallback
84 // (`memory/memory.limit_in_bytes`). Cross-platform: missing
85 // files / non-Linux just leave the field `None`. The
86 // explicit env var still wins so an operator can override
87 // a too-tight cgroup detect without restructuring the
88 // container.
89 if out.max_memory_bytes.is_none() {
90 out.max_memory_bytes = read_cgroup_memory_max();
91 }
92
93 out
94 }
95
96 fn read_u64(suffix: &str) -> Option<u64> {
97 std::env::var(format!("RED_{suffix}"))
98 .or_else(|_| std::env::var(format!("REDDB_{suffix}")))
99 .ok()
100 .and_then(|raw| raw.trim().parse::<u64>().ok())
101 }
102
103 /// Whether `max_db_size_bytes` is set and `current_bytes`
104 /// exceeds it. Cheap branch — caller decides what to do
105 /// (surface-specific error code, refuse new writes, suspend).
106 pub fn db_size_exceeded(&self, current_bytes: u64) -> bool {
107 match self.max_db_size_bytes {
108 Some(limit) if limit > 0 => current_bytes > limit,
109 _ => false,
110 }
111 }
112
113 pub fn batch_size_exceeded(&self, requested: usize) -> bool {
114 match self.max_batch_size {
115 Some(limit) if limit > 0 => (requested as u64) > limit,
116 _ => false,
117 }
118 }
119
120 /// Issue #205 — disk-headroom monitor. When `available_bytes` drops
121 /// below `threshold_bytes`, emit a `DiskSpaceCritical` operator
122 /// event. Returns whether the threshold was breached so callers
123 /// can also fail the path that triggered the check. Cheap to call
124 /// per-write: the threshold check is one branch and the emit only
125 /// runs on breach.
126 pub fn check_disk_headroom(
127 &self,
128 path: &str,
129 available_bytes: u64,
130 threshold_bytes: u64,
131 ) -> bool {
132 if threshold_bytes > 0 && available_bytes < threshold_bytes {
133 crate::telemetry::operator_event::OperatorEvent::DiskSpaceCritical {
134 path: path.to_string(),
135 available_bytes,
136 threshold_bytes,
137 }
138 .emit_global();
139 true
140 } else {
141 false
142 }
143 }
144}
145
146/// Read the active cgroup memory cap, returning bytes when known.
147/// Cgroup v2 first (`/sys/fs/cgroup/memory.max`), v1 fallback
148/// (`/sys/fs/cgroup/memory/memory.limit_in_bytes`). The literal
149/// string `max` (cgroup v2 "no cap") returns `None` so the
150/// resource-limits struct stays at "no cap pinned".
151///
152/// Non-Linux / missing files / unparseable contents → `None`. Never
153/// panics; the caller treats absence as "fall through to whatever
154/// upstream layer decides".
155fn read_cgroup_memory_max() -> Option<u64> {
156 // cgroup v2
157 if let Ok(raw) = std::fs::read_to_string("/sys/fs/cgroup/memory.max") {
158 let trimmed = raw.trim();
159 if trimmed != "max" && !trimmed.is_empty() {
160 if let Ok(bytes) = trimmed.parse::<u64>() {
161 if bytes > 0 && bytes < u64::MAX / 2 {
162 return Some(bytes);
163 }
164 }
165 }
166 }
167 // cgroup v1
168 if let Ok(raw) = std::fs::read_to_string("/sys/fs/cgroup/memory/memory.limit_in_bytes") {
169 if let Ok(bytes) = raw.trim().parse::<u64>() {
170 // Kernels report `9223372036854771712` as "unlimited" in
171 // cgroup v1; treat any value that's effectively
172 // unbounded as `None`.
173 if bytes > 0 && bytes < (u64::MAX / 2) {
174 return Some(bytes);
175 }
176 }
177 }
178 None
179}
180
181#[cfg(test)]
182mod tests {
183 use super::*;
184
185 fn env_set(key: &str, value: &str) {
186 unsafe {
187 std::env::set_var(key, value);
188 }
189 }
190 fn env_unset(key: &str) {
191 unsafe {
192 std::env::remove_var(key);
193 }
194 }
195
196 #[test]
197 fn from_env_reads_max_db_size() {
198 env_set("RED_MAX_DB_SIZE_BYTES", "1073741824");
199 let limits = ResourceLimits::from_env();
200 env_unset("RED_MAX_DB_SIZE_BYTES");
201 assert_eq!(limits.max_db_size_bytes, Some(1_073_741_824));
202 }
203
204 #[test]
205 fn legacy_reddb_prefix_is_accepted() {
206 env_set("REDDB_MAX_BATCH_SIZE", "10000");
207 let limits = ResourceLimits::from_env();
208 env_unset("REDDB_MAX_BATCH_SIZE");
209 assert_eq!(limits.max_batch_size, Some(10_000));
210 }
211
212 #[test]
213 fn unset_env_yields_no_limit() {
214 env_unset("RED_MAX_QPS");
215 env_unset("REDDB_MAX_QPS");
216 let limits = ResourceLimits::from_env();
217 assert!(limits.max_qps.is_none());
218 }
219
220 #[test]
221 fn db_size_exceeded_respects_zero_as_unbounded() {
222 let limits = ResourceLimits {
223 max_db_size_bytes: Some(0),
224 ..Default::default()
225 };
226 assert!(!limits.db_size_exceeded(u64::MAX));
227 }
228
229 #[test]
230 fn db_size_exceeded_triggers_above_limit() {
231 let limits = ResourceLimits {
232 max_db_size_bytes: Some(1024),
233 ..Default::default()
234 };
235 assert!(!limits.db_size_exceeded(1024));
236 assert!(limits.db_size_exceeded(1025));
237 }
238
239 #[test]
240 fn memory_mb_converts_to_bytes() {
241 env_set("RED_MAX_MEMORY_MB", "256");
242 let limits = ResourceLimits::from_env();
243 env_unset("RED_MAX_MEMORY_MB");
244 assert_eq!(limits.max_memory_bytes, Some(256 * 1024 * 1024));
245 }
246
247 #[test]
248 fn query_duration_parses_to_duration() {
249 env_set("RED_MAX_QUERY_DURATION_MS", "30000");
250 let limits = ResourceLimits::from_env();
251 env_unset("RED_MAX_QUERY_DURATION_MS");
252 assert_eq!(limits.max_query_duration, Some(Duration::from_secs(30)));
253 }
254}