Skip to main content

reddb_server/runtime/
config_matrix.rs

1//! Performance / operational config matrix.
2//!
3//! Two tiers:
4//!
5//! - **Tier A (`Critical`)** — self-healing on boot. If the key is
6//!   missing from `red_config`, the loader writes the default in.
7//!   Operators always see these via `SHOW CONFIG` so they know what
8//!   guarantees and tuning they have.
9//! - **Tier B (`Optional`)** — in-memory default. Never self-populated.
10//!   Appears in `SHOW CONFIG` only after an explicit `SET CONFIG`.
11//!
12//! The matrix is the single source of truth for perf / durability /
13//! concurrency / storage keys introduced by the perf-parity push.
14//! It intentionally does **not** cover the pre-existing `red.*`
15//! trees (ai, server, storage, search, etc.) — those have their own
16//! lifecycle in `impl_core`. Keys here live under the new
17//! `cache.*`, `durability.*`, `concurrency.*`, `storage.*` namespaces.
18
19use crate::serde_json::Value as JsonValue;
20use crate::storage::UnifiedStore;
21
22#[inline]
23fn num(v: f64) -> JsonValue {
24    JsonValue::Number(v)
25}
26
27#[inline]
28fn text(s: &str) -> JsonValue {
29    JsonValue::String(s.to_string())
30}
31
32/// Default value encoded as JSON so the loader can delegate to
33/// `set_config_tree` which already handles every `Value` variant.
34#[derive(Debug, Clone)]
35pub struct ConfigDefault {
36    pub key: &'static str,
37    pub tier: Tier,
38    /// Lazily produced JSON default. A closure because `bgwriter.delay_ms`
39    /// etc. are unsigned and `serde_json::Value::from(u64)` is fine, but
40    /// we want the option of composing richer defaults later.
41    pub default: fn() -> JsonValue,
42}
43
44#[derive(Debug, Clone, Copy, PartialEq, Eq)]
45pub enum Tier {
46    /// Self-healing on boot. Always visible in `SHOW CONFIG`.
47    Critical,
48    /// In-memory default. Only visible in `SHOW CONFIG` after user writes.
49    Optional,
50}
51
52/// The full matrix. Keep sorted by namespace for readability.
53pub const MATRIX: &[ConfigDefault] = &[
54    // ask.*
55    ConfigDefault {
56        key: "ask.max_prompt_tokens",
57        tier: Tier::Optional,
58        default: || num(8192.0),
59    },
60    ConfigDefault {
61        key: "ask.max_completion_tokens",
62        tier: Tier::Optional,
63        default: || num(1024.0),
64    },
65    ConfigDefault {
66        key: "ask.max_sources_bytes",
67        tier: Tier::Optional,
68        default: || num(262_144.0),
69    },
70    ConfigDefault {
71        key: "ask.timeout_ms",
72        tier: Tier::Optional,
73        default: || num(30_000.0),
74    },
75    ConfigDefault {
76        key: "ask.daily_cost_cap_usd",
77        tier: Tier::Optional,
78        default: || text(""),
79    },
80    ConfigDefault {
81        key: "ask.audit.include_answer",
82        tier: Tier::Optional,
83        default: || JsonValue::Bool(false),
84    },
85    ConfigDefault {
86        key: "ask.audit.retention_days",
87        tier: Tier::Optional,
88        default: || num(90.0),
89    },
90    ConfigDefault {
91        key: "ask.cache.enabled",
92        tier: Tier::Optional,
93        default: || JsonValue::Bool(false),
94    },
95    ConfigDefault {
96        key: "ask.cache.default_ttl",
97        tier: Tier::Optional,
98        default: || text(""),
99    },
100    ConfigDefault {
101        key: "ask.cache.max_entries",
102        tier: Tier::Optional,
103        default: || num(1024.0),
104    },
105    // cache.blob.*
106    ConfigDefault {
107        key: "cache.blob.l1_bytes_max",
108        tier: Tier::Critical,
109        default: || num(crate::storage::cache::DEFAULT_BLOB_L1_BYTES_MAX as f64),
110    },
111    ConfigDefault {
112        key: "cache.blob.l2_bytes_max",
113        tier: Tier::Critical,
114        default: || num(crate::storage::cache::DEFAULT_BLOB_L2_BYTES_MAX as f64),
115    },
116    ConfigDefault {
117        key: "cache.blob.max_namespaces",
118        tier: Tier::Critical,
119        default: || num(crate::storage::cache::DEFAULT_BLOB_MAX_NAMESPACES as f64),
120    },
121    // durability.*
122    ConfigDefault {
123        key: "durability.mode",
124        tier: Tier::Critical,
125        default: || text("sync"),
126    },
127    // runtime.result_cache.*
128    ConfigDefault {
129        key: "runtime.result_cache.backend",
130        tier: Tier::Critical,
131        default: || text("legacy"),
132    },
133    // concurrency.*
134    ConfigDefault {
135        key: "concurrency.locking.enabled",
136        tier: Tier::Critical,
137        default: || JsonValue::Bool(true),
138    },
139    ConfigDefault {
140        key: "concurrency.locking.deadlock_timeout_ms",
141        tier: Tier::Optional,
142        default: || num(5000.0),
143    },
144    // storage.wal.*
145    ConfigDefault {
146        key: "storage.wal.max_interval_ms",
147        tier: Tier::Critical,
148        default: || num(10.0),
149    },
150    ConfigDefault {
151        key: "storage.wal.min_batch_size",
152        tier: Tier::Optional,
153        default: || num(4.0),
154    },
155    // storage.bgwriter.*
156    ConfigDefault {
157        key: "storage.bgwriter.delay_ms",
158        tier: Tier::Critical,
159        default: || num(200.0),
160    },
161    ConfigDefault {
162        key: "storage.bgwriter.max_pages_per_round",
163        tier: Tier::Optional,
164        default: || num(100.0),
165    },
166    ConfigDefault {
167        key: "storage.bgwriter.lru_multiplier",
168        tier: Tier::Optional,
169        default: || num(2.0),
170    },
171    // storage.bulk_insert.*
172    ConfigDefault {
173        key: "storage.bulk_insert.max_buffered_rows",
174        tier: Tier::Optional,
175        default: || num(1000.0),
176    },
177    ConfigDefault {
178        key: "storage.bulk_insert.max_buffered_bytes",
179        tier: Tier::Optional,
180        default: || num(65536.0),
181    },
182    // storage.hot_update.*
183    ConfigDefault {
184        key: "storage.hot_update.max_chain_hops",
185        tier: Tier::Optional,
186        default: || num(32.0),
187    },
188    // storage.btree.*
189    ConfigDefault {
190        key: "storage.btree.lehman_yao",
191        tier: Tier::Critical,
192        default: || JsonValue::Bool(true),
193    },
194    // ai.ner.* — opt-in LLM backend for AskPipeline Stage 1 (issue #189).
195    // Default backend stays heuristic so existing deployments keep
196    // their current behaviour without operator action.
197    ConfigDefault {
198        key: "ai.ner.backend",
199        tier: Tier::Optional,
200        default: || text("heuristic"),
201    },
202    ConfigDefault {
203        key: "ai.ner.endpoint",
204        tier: Tier::Optional,
205        default: || text(""),
206    },
207    ConfigDefault {
208        key: "ai.ner.model",
209        tier: Tier::Optional,
210        default: || text(""),
211    },
212    ConfigDefault {
213        key: "ai.ner.timeout_ms",
214        tier: Tier::Optional,
215        default: || num(5000.0),
216    },
217    ConfigDefault {
218        key: "ai.ner.fallback",
219        tier: Tier::Optional,
220        default: || text("use_heuristic"),
221    },
222    // runtime.ai.transport.* — shared outbound AI HTTP client foundation
223    // (issue #274). Provider rewiring can opt into these defaults
224    // incrementally.
225    ConfigDefault {
226        key: "runtime.ai.transport_pool_size",
227        tier: Tier::Optional,
228        default: || num(16.0),
229    },
230    ConfigDefault {
231        key: "runtime.ai.transport_timeout_ms",
232        tier: Tier::Optional,
233        default: || num(30000.0),
234    },
235    ConfigDefault {
236        key: "runtime.ai.transport_retry_max_attempts",
237        tier: Tier::Optional,
238        default: || num(3.0),
239    },
240    ConfigDefault {
241        key: "runtime.ai.transport_retry_base_ms",
242        tier: Tier::Optional,
243        default: || num(500.0),
244    },
245    // cache.blob.policy.* — extended TTL hot-path opt-in (issue #189).
246    ConfigDefault {
247        key: "cache.blob.policy.extended",
248        tier: Tier::Optional,
249        default: || text("off"),
250    },
251    // cache.blob.async_promotion — async L2->L1 promotion pool opt-in
252    // (issue #193). When "on", L2 hits return bytes to the caller
253    // immediately and the L1 install runs on a background worker.
254    // Default "off" for safe rollout — legacy synchronous promotion path.
255    ConfigDefault {
256        key: "cache.blob.async_promotion",
257        tier: Tier::Optional,
258        default: || text("off"),
259    },
260];
261
262/// Fetch the JSON default for a matrix key. Returns `None` when the
263/// key is not in the matrix (callers should treat that as a
264/// programming error — unknown key, unknown tier, unknown semantics).
265pub fn default_for(key: &str) -> Option<JsonValue> {
266    MATRIX
267        .iter()
268        .find(|entry| entry.key == key)
269        .map(|entry| (entry.default)())
270}
271
272/// Tier lookup — useful for tests and for introspection commands
273/// that want to report whether a key is expected to self-heal.
274pub fn tier_for(key: &str) -> Option<Tier> {
275    MATRIX
276        .iter()
277        .find(|entry| entry.key == key)
278        .map(|entry| entry.tier)
279}
280
281/// Boot-time self-healing pass: for every `Tier::Critical` key, if
282/// `red_config` does not already contain the key, write the default
283/// in. Idempotent — re-running produces no writes.
284///
285/// `Tier::Optional` keys are never touched here; they stay
286/// transparent-default until a user `SET CONFIG` elevates them.
287pub fn heal_critical_keys(store: &UnifiedStore) {
288    // `set_config_tree` dot-splits the key and stores one row per
289    // leaf, so we handle each matrix entry individually.
290    for entry in MATRIX {
291        if entry.tier != Tier::Critical {
292            continue;
293        }
294        if is_key_present(store, entry.key) {
295            continue;
296        }
297        store.set_config_tree(entry.key, &(entry.default)());
298    }
299}
300
301/// Lightweight presence probe. Avoids loading the whole red_config
302/// collection; scans until the first hit.
303fn is_key_present(store: &UnifiedStore, key: &str) -> bool {
304    let Some(manager) = store.get_collection("red_config") else {
305        return false;
306    };
307    let mut found = false;
308    manager.for_each_entity(|entity| {
309        if let Some(row) = entity.data.as_row() {
310            let entry_key = row.get_field("key").and_then(|v| match v {
311                crate::storage::schema::Value::Text(s) => Some(s.as_ref()),
312                _ => None,
313            });
314            if entry_key == Some(key) {
315                found = true;
316                return false; // short-circuit
317            }
318        }
319        true
320    });
321    found
322}
323
324#[cfg(test)]
325mod tests {
326    use super::*;
327
328    #[test]
329    fn every_matrix_entry_has_a_default_that_resolves() {
330        for entry in MATRIX {
331            let value = (entry.default)();
332            assert!(
333                !matches!(value, JsonValue::Null),
334                "matrix key {} has a null default, defeats self-heal",
335                entry.key
336            );
337        }
338    }
339
340    #[test]
341    fn critical_keys_cover_the_core_guarantees() {
342        // This list is a tripwire — if someone drops one of these
343        // from Tier A without updating callers, the test catches it.
344        let required_critical = [
345            "cache.blob.l1_bytes_max",
346            "cache.blob.l2_bytes_max",
347            "cache.blob.max_namespaces",
348            "durability.mode",
349            "runtime.result_cache.backend",
350            "concurrency.locking.enabled",
351            "storage.wal.max_interval_ms",
352            "storage.bgwriter.delay_ms",
353            "storage.btree.lehman_yao",
354        ];
355        for key in required_critical {
356            assert_eq!(
357                tier_for(key),
358                Some(Tier::Critical),
359                "{key} must be a Tier A (Critical) key",
360            );
361        }
362    }
363
364    #[test]
365    fn optional_keys_are_not_self_healed() {
366        let must_be_optional = [
367            "concurrency.locking.deadlock_timeout_ms",
368            "storage.wal.min_batch_size",
369            "storage.bgwriter.max_pages_per_round",
370            "storage.bgwriter.lru_multiplier",
371            "storage.bulk_insert.max_buffered_rows",
372            "storage.bulk_insert.max_buffered_bytes",
373            "storage.hot_update.max_chain_hops",
374        ];
375        for key in must_be_optional {
376            assert_eq!(tier_for(key), Some(Tier::Optional), "{key} tier mismatch");
377        }
378    }
379
380    #[test]
381    fn unknown_key_returns_none() {
382        assert!(default_for("nonexistent.key").is_none());
383        assert!(tier_for("nonexistent.key").is_none());
384    }
385}