Skip to main content

reddb_server/runtime/
config_matrix.rs

1//! Performance / operational config matrix.
2//!
3//! Two tiers:
4//!
5//! - **Tier A (`Critical`)** — self-healing on boot. If the key is
6//!   missing from `red_config`, the loader writes the default in.
7//!   Operators always see these via `SHOW CONFIG` so they know what
8//!   guarantees and tuning they have.
9//! - **Tier B (`Optional`)** — in-memory default. Never self-populated.
10//!   Appears in `SHOW CONFIG` only after an explicit `SET CONFIG`.
11//!
12//! The matrix is the single source of truth for perf / durability /
13//! concurrency / storage keys introduced by the perf-parity push.
14//! It intentionally does **not** cover the pre-existing `red.*`
15//! trees (ai, server, storage, search, etc.) — those have their own
16//! lifecycle in `impl_core`. Keys here live under the new
17//! `cache.*`, `durability.*`, `concurrency.*`, `storage.*` namespaces.
18
19use crate::serde_json::Value as JsonValue;
20use crate::storage::UnifiedStore;
21
22#[inline]
23fn num(v: f64) -> JsonValue {
24    JsonValue::Number(v)
25}
26
27#[inline]
28fn text(s: &str) -> JsonValue {
29    JsonValue::String(s.to_string())
30}
31
32/// Default value encoded as JSON so the loader can delegate to
33/// `set_config_tree` which already handles every `Value` variant.
34#[derive(Debug, Clone)]
35pub struct ConfigDefault {
36    pub key: &'static str,
37    pub tier: Tier,
38    /// Lazily produced JSON default. A closure because `bgwriter.delay_ms`
39    /// etc. are unsigned and `serde_json::Value::from(u64)` is fine, but
40    /// we want the option of composing richer defaults later.
41    pub default: fn() -> JsonValue,
42}
43
44#[derive(Debug, Clone, Copy, PartialEq, Eq)]
45pub enum Tier {
46    /// Self-healing on boot. Always visible in `SHOW CONFIG`.
47    Critical,
48    /// In-memory default. Only visible in `SHOW CONFIG` after user writes.
49    Optional,
50}
51
52/// The full matrix. Keep sorted by namespace for readability.
53pub const MATRIX: &[ConfigDefault] = &[
54    // ask.*
55    ConfigDefault {
56        key: "ask.max_prompt_tokens",
57        tier: Tier::Optional,
58        default: || num(8192.0),
59    },
60    ConfigDefault {
61        key: "ask.max_completion_tokens",
62        tier: Tier::Optional,
63        default: || num(1024.0),
64    },
65    ConfigDefault {
66        key: "ask.max_sources_bytes",
67        tier: Tier::Optional,
68        default: || num(262_144.0),
69    },
70    ConfigDefault {
71        key: "ask.timeout_ms",
72        tier: Tier::Optional,
73        default: || num(30_000.0),
74    },
75    ConfigDefault {
76        key: "ask.daily_cost_cap_usd",
77        tier: Tier::Optional,
78        default: || text(""),
79    },
80    ConfigDefault {
81        key: "ask.audit.include_answer",
82        tier: Tier::Optional,
83        default: || JsonValue::Bool(false),
84    },
85    ConfigDefault {
86        key: "ask.audit.retention_days",
87        tier: Tier::Optional,
88        default: || num(90.0),
89    },
90    ConfigDefault {
91        key: "ask.cache.enabled",
92        tier: Tier::Optional,
93        default: || JsonValue::Bool(false),
94    },
95    ConfigDefault {
96        key: "ask.cache.default_ttl",
97        tier: Tier::Optional,
98        default: || text(""),
99    },
100    ConfigDefault {
101        key: "ask.cache.max_entries",
102        tier: Tier::Optional,
103        default: || num(1024.0),
104    },
105    // cache.blob.*
106    ConfigDefault {
107        key: "cache.blob.l1_bytes_max",
108        tier: Tier::Critical,
109        default: || num(crate::storage::cache::DEFAULT_BLOB_L1_BYTES_MAX as f64),
110    },
111    ConfigDefault {
112        key: "cache.blob.l2_bytes_max",
113        tier: Tier::Critical,
114        default: || num(crate::storage::cache::DEFAULT_BLOB_L2_BYTES_MAX as f64),
115    },
116    ConfigDefault {
117        key: "cache.blob.max_namespaces",
118        tier: Tier::Critical,
119        default: || num(crate::storage::cache::DEFAULT_BLOB_MAX_NAMESPACES as f64),
120    },
121    // durability.*
122    ConfigDefault {
123        key: "durability.mode",
124        tier: Tier::Critical,
125        default: || text("sync"),
126    },
127    // runtime.result_cache.*
128    ConfigDefault {
129        key: "runtime.result_cache.backend",
130        tier: Tier::Critical,
131        default: || text("legacy"),
132    },
133    // Kill-switch (issue #802). Critical so it self-heals to `true` on
134    // boot and is always visible in SHOW CONFIG — operators flip it to
135    // `false` to disable result caching wholesale for debugging.
136    ConfigDefault {
137        key: "runtime.result_cache.enabled",
138        tier: Tier::Critical,
139        default: || JsonValue::Bool(true),
140    },
141    // Per-entry freshness window in seconds (issue #802). Mirrors the
142    // former `RESULT_CACHE_TTL_SECS` constant.
143    ConfigDefault {
144        key: "runtime.result_cache.ttl_seconds",
145        tier: Tier::Optional,
146        default: || num(30.0),
147    },
148    // LRU capacity in entries (issue #802). Mirrors the former
149    // `RESULT_CACHE_MAX_ENTRIES` constant.
150    ConfigDefault {
151        key: "runtime.result_cache.capacity_entries",
152        tier: Tier::Optional,
153        default: || num(1000.0),
154    },
155    // concurrency.*
156    ConfigDefault {
157        key: "concurrency.locking.enabled",
158        tier: Tier::Critical,
159        default: || JsonValue::Bool(true),
160    },
161    ConfigDefault {
162        key: "concurrency.locking.deadlock_timeout_ms",
163        tier: Tier::Optional,
164        default: || num(5000.0),
165    },
166    // storage.wal.*
167    ConfigDefault {
168        key: "storage.wal.max_interval_ms",
169        tier: Tier::Critical,
170        default: || num(10.0),
171    },
172    ConfigDefault {
173        key: "storage.wal.min_batch_size",
174        tier: Tier::Optional,
175        default: || num(4.0),
176    },
177    // storage.deploy.* — official deploy/storage profile selection.
178    ConfigDefault {
179        key: "storage.deploy.profile",
180        tier: Tier::Critical,
181        default: || text("embedded"),
182    },
183    ConfigDefault {
184        key: "storage.deploy.packaging",
185        tier: Tier::Critical,
186        default: || text("single-file"),
187    },
188    ConfigDefault {
189        key: "storage.deploy.preset",
190        tier: Tier::Critical,
191        default: || text("embedded"),
192    },
193    ConfigDefault {
194        key: "storage.deploy.replica_count",
195        tier: Tier::Critical,
196        default: || num(0.0),
197    },
198    ConfigDefault {
199        key: "storage.deploy.managed_backup",
200        tier: Tier::Critical,
201        default: || JsonValue::Bool(false),
202    },
203    ConfigDefault {
204        key: "storage.deploy.wal_retention",
205        tier: Tier::Critical,
206        default: || JsonValue::Bool(false),
207    },
208    // storage.bgwriter.*
209    ConfigDefault {
210        key: "storage.bgwriter.delay_ms",
211        tier: Tier::Critical,
212        default: || num(200.0),
213    },
214    ConfigDefault {
215        key: "storage.bgwriter.max_pages_per_round",
216        tier: Tier::Optional,
217        default: || num(100.0),
218    },
219    ConfigDefault {
220        key: "storage.bgwriter.lru_multiplier",
221        tier: Tier::Optional,
222        default: || num(2.0),
223    },
224    // storage.bulk_insert.*
225    ConfigDefault {
226        key: "storage.bulk_insert.max_buffered_rows",
227        tier: Tier::Optional,
228        default: || num(1000.0),
229    },
230    ConfigDefault {
231        key: "storage.bulk_insert.max_buffered_bytes",
232        tier: Tier::Optional,
233        default: || num(65536.0),
234    },
235    // storage.hot_update.*
236    ConfigDefault {
237        key: "storage.hot_update.max_chain_hops",
238        tier: Tier::Optional,
239        default: || num(32.0),
240    },
241    // storage.btree.*
242    ConfigDefault {
243        key: "storage.btree.lehman_yao",
244        tier: Tier::Critical,
245        default: || JsonValue::Bool(true),
246    },
247    // ai.ner.* — opt-in LLM backend for AskPipeline Stage 1 (issue #189).
248    // Default backend stays heuristic so existing deployments keep
249    // their current behaviour without operator action.
250    ConfigDefault {
251        key: "ai.ner.backend",
252        tier: Tier::Optional,
253        default: || text("heuristic"),
254    },
255    ConfigDefault {
256        key: "ai.ner.endpoint",
257        tier: Tier::Optional,
258        default: || text(""),
259    },
260    ConfigDefault {
261        key: "ai.ner.model",
262        tier: Tier::Optional,
263        default: || text(""),
264    },
265    ConfigDefault {
266        key: "ai.ner.timeout_ms",
267        tier: Tier::Optional,
268        default: || num(5000.0),
269    },
270    ConfigDefault {
271        key: "ai.ner.fallback",
272        tier: Tier::Optional,
273        default: || text("use_heuristic"),
274    },
275    // runtime.ai.transport.* — shared outbound AI HTTP client foundation
276    // (issue #274). Provider rewiring can opt into these defaults
277    // incrementally.
278    ConfigDefault {
279        key: "runtime.ai.transport_pool_size",
280        tier: Tier::Optional,
281        default: || num(16.0),
282    },
283    ConfigDefault {
284        key: "runtime.ai.transport_timeout_ms",
285        tier: Tier::Optional,
286        default: || num(30000.0),
287    },
288    ConfigDefault {
289        key: "runtime.ai.transport_retry_max_attempts",
290        tier: Tier::Optional,
291        default: || num(3.0),
292    },
293    ConfigDefault {
294        key: "runtime.ai.transport_retry_base_ms",
295        tier: Tier::Optional,
296        default: || num(500.0),
297    },
298    // cache.blob.policy.* — extended TTL hot-path opt-in (issue #189).
299    ConfigDefault {
300        key: "cache.blob.policy.extended",
301        tier: Tier::Optional,
302        default: || text("off"),
303    },
304    // cache.blob.async_promotion — async L2->L1 promotion pool opt-in
305    // (issue #193). When "on", L2 hits return bytes to the caller
306    // immediately and the L1 install runs on a background worker.
307    // Default "off" for safe rollout — legacy synchronous promotion path.
308    ConfigDefault {
309        key: "cache.blob.async_promotion",
310        tier: Tier::Optional,
311        default: || text("off"),
312    },
313];
314
315/// Fetch the JSON default for a matrix key. Returns `None` when the
316/// key is not in the matrix (callers should treat that as a
317/// programming error — unknown key, unknown tier, unknown semantics).
318pub fn default_for(key: &str) -> Option<JsonValue> {
319    MATRIX
320        .iter()
321        .find(|entry| entry.key == key)
322        .map(|entry| (entry.default)())
323}
324
325/// Tier lookup — useful for tests and for introspection commands
326/// that want to report whether a key is expected to self-heal.
327pub fn tier_for(key: &str) -> Option<Tier> {
328    MATRIX
329        .iter()
330        .find(|entry| entry.key == key)
331        .map(|entry| entry.tier)
332}
333
334/// Boot-time self-healing pass: for every `Tier::Critical` key, if
335/// `red_config` does not already contain the key, write the default
336/// in. Idempotent — re-running produces no writes.
337///
338/// `Tier::Optional` keys are never touched here; they stay
339/// transparent-default until a user `SET CONFIG` elevates them.
340pub fn heal_critical_keys(store: &UnifiedStore) {
341    // `set_config_tree` dot-splits the key and stores one row per
342    // leaf, so we handle each matrix entry individually.
343    for entry in MATRIX {
344        if entry.tier != Tier::Critical {
345            continue;
346        }
347        if is_key_present(store, entry.key) {
348            continue;
349        }
350        store.set_config_tree(entry.key, &(entry.default)());
351    }
352}
353
354/// Lightweight presence probe. Avoids loading the whole red_config
355/// collection; scans until the first hit.
356fn is_key_present(store: &UnifiedStore, key: &str) -> bool {
357    let Some(manager) = store.get_collection("red_config") else {
358        return false;
359    };
360    let mut found = false;
361    manager.for_each_entity(|entity| {
362        if let Some(row) = entity.data.as_row() {
363            let entry_key = row.get_field("key").and_then(|v| match v {
364                crate::storage::schema::Value::Text(s) => Some(s.as_ref()),
365                _ => None,
366            });
367            if entry_key == Some(key) {
368                found = true;
369                return false; // short-circuit
370            }
371        }
372        true
373    });
374    found
375}
376
377#[cfg(test)]
378mod tests {
379    use super::*;
380
381    #[test]
382    fn every_matrix_entry_has_a_default_that_resolves() {
383        for entry in MATRIX {
384            let value = (entry.default)();
385            assert!(
386                !matches!(value, JsonValue::Null),
387                "matrix key {} has a null default, defeats self-heal",
388                entry.key
389            );
390        }
391    }
392
393    #[test]
394    fn critical_keys_cover_the_core_guarantees() {
395        // This list is a tripwire — if someone drops one of these
396        // from Tier A without updating callers, the test catches it.
397        let required_critical = [
398            "cache.blob.l1_bytes_max",
399            "cache.blob.l2_bytes_max",
400            "cache.blob.max_namespaces",
401            "durability.mode",
402            "runtime.result_cache.backend",
403            "concurrency.locking.enabled",
404            "storage.wal.max_interval_ms",
405            "storage.deploy.profile",
406            "storage.deploy.packaging",
407            "storage.deploy.preset",
408            "storage.deploy.replica_count",
409            "storage.deploy.managed_backup",
410            "storage.deploy.wal_retention",
411            "storage.bgwriter.delay_ms",
412            "storage.btree.lehman_yao",
413        ];
414        for key in required_critical {
415            assert_eq!(
416                tier_for(key),
417                Some(Tier::Critical),
418                "{key} must be a Tier A (Critical) key",
419            );
420        }
421    }
422
423    #[test]
424    fn optional_keys_are_not_self_healed() {
425        let must_be_optional = [
426            "concurrency.locking.deadlock_timeout_ms",
427            "storage.wal.min_batch_size",
428            "storage.bgwriter.max_pages_per_round",
429            "storage.bgwriter.lru_multiplier",
430            "storage.bulk_insert.max_buffered_rows",
431            "storage.bulk_insert.max_buffered_bytes",
432            "storage.hot_update.max_chain_hops",
433        ];
434        for key in must_be_optional {
435            assert_eq!(tier_for(key), Some(Tier::Optional), "{key} tier mismatch");
436        }
437    }
438
439    #[test]
440    fn unknown_key_returns_none() {
441        assert!(default_for("nonexistent.key").is_none());
442        assert!(tier_for("nonexistent.key").is_none());
443    }
444}