Skip to main content

reddb_server/runtime/
config_matrix.rs

1//! Performance / operational config matrix.
2//!
3//! Two tiers:
4//!
5//! - **Tier A (`Critical`)** — self-healing on boot. If the key is
6//!   missing from `red_config`, the loader writes the default in.
7//!   Operators always see these via `SHOW CONFIG` so they know what
8//!   guarantees and tuning they have.
9//! - **Tier B (`Optional`)** — in-memory default. Never self-populated.
10//!   Appears in `SHOW CONFIG` only after an explicit `SET CONFIG`.
11//!
12//! The matrix is the single source of truth for perf / durability /
13//! concurrency / storage keys introduced by the perf-parity push.
14//! It intentionally does **not** cover the pre-existing `red.*`
15//! trees (ai, server, storage, search, etc.) — those have their own
16//! lifecycle in `impl_core`. Keys here live under the new
17//! `cache.*`, `durability.*`, `concurrency.*`, `storage.*` namespaces.
18
19use crate::serde_json::Value as JsonValue;
20use crate::storage::UnifiedStore;
21
22#[inline]
23fn num(v: f64) -> JsonValue {
24    JsonValue::Number(v)
25}
26
27#[inline]
28fn text(s: &str) -> JsonValue {
29    JsonValue::String(s.to_string())
30}
31
32/// Default value encoded as JSON so the loader can delegate to
33/// `set_config_tree` which already handles every `Value` variant.
34#[derive(Debug, Clone)]
35pub struct ConfigDefault {
36    pub key: &'static str,
37    pub tier: Tier,
38    /// Lazily produced JSON default. A closure because `bgwriter.delay_ms`
39    /// etc. are unsigned and `serde_json::Value::from(u64)` is fine, but
40    /// we want the option of composing richer defaults later.
41    pub default: fn() -> JsonValue,
42}
43
44#[derive(Debug, Clone, Copy, PartialEq, Eq)]
45pub enum Tier {
46    /// Self-healing on boot. Always visible in `SHOW CONFIG`.
47    Critical,
48    /// In-memory default. Only visible in `SHOW CONFIG` after user writes.
49    Optional,
50}
51
52/// The full matrix. Keep sorted by namespace for readability.
53pub const MATRIX: &[ConfigDefault] = &[
54    // cache.blob.*
55    ConfigDefault {
56        key: "cache.blob.l1_bytes_max",
57        tier: Tier::Critical,
58        default: || num(crate::storage::cache::DEFAULT_BLOB_L1_BYTES_MAX as f64),
59    },
60    ConfigDefault {
61        key: "cache.blob.l2_bytes_max",
62        tier: Tier::Critical,
63        default: || num(crate::storage::cache::DEFAULT_BLOB_L2_BYTES_MAX as f64),
64    },
65    ConfigDefault {
66        key: "cache.blob.max_namespaces",
67        tier: Tier::Critical,
68        default: || num(crate::storage::cache::DEFAULT_BLOB_MAX_NAMESPACES as f64),
69    },
70    // durability.*
71    ConfigDefault {
72        key: "durability.mode",
73        tier: Tier::Critical,
74        default: || text("sync"),
75    },
76    // runtime.result_cache.*
77    ConfigDefault {
78        key: "runtime.result_cache.backend",
79        tier: Tier::Critical,
80        default: || text("legacy"),
81    },
82    // concurrency.*
83    ConfigDefault {
84        key: "concurrency.locking.enabled",
85        tier: Tier::Critical,
86        default: || JsonValue::Bool(true),
87    },
88    ConfigDefault {
89        key: "concurrency.locking.deadlock_timeout_ms",
90        tier: Tier::Optional,
91        default: || num(5000.0),
92    },
93    // storage.wal.*
94    ConfigDefault {
95        key: "storage.wal.max_interval_ms",
96        tier: Tier::Critical,
97        default: || num(10.0),
98    },
99    ConfigDefault {
100        key: "storage.wal.min_batch_size",
101        tier: Tier::Optional,
102        default: || num(4.0),
103    },
104    // storage.bgwriter.*
105    ConfigDefault {
106        key: "storage.bgwriter.delay_ms",
107        tier: Tier::Critical,
108        default: || num(200.0),
109    },
110    ConfigDefault {
111        key: "storage.bgwriter.max_pages_per_round",
112        tier: Tier::Optional,
113        default: || num(100.0),
114    },
115    ConfigDefault {
116        key: "storage.bgwriter.lru_multiplier",
117        tier: Tier::Optional,
118        default: || num(2.0),
119    },
120    // storage.bulk_insert.*
121    ConfigDefault {
122        key: "storage.bulk_insert.max_buffered_rows",
123        tier: Tier::Optional,
124        default: || num(1000.0),
125    },
126    ConfigDefault {
127        key: "storage.bulk_insert.max_buffered_bytes",
128        tier: Tier::Optional,
129        default: || num(65536.0),
130    },
131    // storage.hot_update.*
132    ConfigDefault {
133        key: "storage.hot_update.max_chain_hops",
134        tier: Tier::Optional,
135        default: || num(32.0),
136    },
137    // storage.btree.*
138    ConfigDefault {
139        key: "storage.btree.lehman_yao",
140        tier: Tier::Critical,
141        default: || JsonValue::Bool(true),
142    },
143    // ai.ner.* — opt-in LLM backend for AskPipeline Stage 1 (issue #189).
144    // Default backend stays heuristic so existing deployments keep
145    // their current behaviour without operator action.
146    ConfigDefault {
147        key: "ai.ner.backend",
148        tier: Tier::Optional,
149        default: || text("heuristic"),
150    },
151    ConfigDefault {
152        key: "ai.ner.endpoint",
153        tier: Tier::Optional,
154        default: || text(""),
155    },
156    ConfigDefault {
157        key: "ai.ner.model",
158        tier: Tier::Optional,
159        default: || text(""),
160    },
161    ConfigDefault {
162        key: "ai.ner.timeout_ms",
163        tier: Tier::Optional,
164        default: || num(5000.0),
165    },
166    ConfigDefault {
167        key: "ai.ner.fallback",
168        tier: Tier::Optional,
169        default: || text("use_heuristic"),
170    },
171    // runtime.ai.transport.* — shared outbound AI HTTP client foundation
172    // (issue #274). Provider rewiring can opt into these defaults
173    // incrementally.
174    ConfigDefault {
175        key: "runtime.ai.transport_pool_size",
176        tier: Tier::Optional,
177        default: || num(16.0),
178    },
179    ConfigDefault {
180        key: "runtime.ai.transport_timeout_ms",
181        tier: Tier::Optional,
182        default: || num(30000.0),
183    },
184    ConfigDefault {
185        key: "runtime.ai.transport_retry_max_attempts",
186        tier: Tier::Optional,
187        default: || num(3.0),
188    },
189    ConfigDefault {
190        key: "runtime.ai.transport_retry_base_ms",
191        tier: Tier::Optional,
192        default: || num(500.0),
193    },
194    // cache.blob.policy.* — extended TTL hot-path opt-in (issue #189).
195    ConfigDefault {
196        key: "cache.blob.policy.extended",
197        tier: Tier::Optional,
198        default: || text("off"),
199    },
200    // cache.blob.async_promotion — async L2->L1 promotion pool opt-in
201    // (issue #193). When "on", L2 hits return bytes to the caller
202    // immediately and the L1 install runs on a background worker.
203    // Default "off" for safe rollout — legacy synchronous promotion path.
204    ConfigDefault {
205        key: "cache.blob.async_promotion",
206        tier: Tier::Optional,
207        default: || text("off"),
208    },
209];
210
211/// Fetch the JSON default for a matrix key. Returns `None` when the
212/// key is not in the matrix (callers should treat that as a
213/// programming error — unknown key, unknown tier, unknown semantics).
214pub fn default_for(key: &str) -> Option<JsonValue> {
215    MATRIX
216        .iter()
217        .find(|entry| entry.key == key)
218        .map(|entry| (entry.default)())
219}
220
221/// Tier lookup — useful for tests and for introspection commands
222/// that want to report whether a key is expected to self-heal.
223pub fn tier_for(key: &str) -> Option<Tier> {
224    MATRIX
225        .iter()
226        .find(|entry| entry.key == key)
227        .map(|entry| entry.tier)
228}
229
230/// Boot-time self-healing pass: for every `Tier::Critical` key, if
231/// `red_config` does not already contain the key, write the default
232/// in. Idempotent — re-running produces no writes.
233///
234/// `Tier::Optional` keys are never touched here; they stay
235/// transparent-default until a user `SET CONFIG` elevates them.
236pub fn heal_critical_keys(store: &UnifiedStore) {
237    // `set_config_tree` dot-splits the key and stores one row per
238    // leaf, so we handle each matrix entry individually.
239    for entry in MATRIX {
240        if entry.tier != Tier::Critical {
241            continue;
242        }
243        if is_key_present(store, entry.key) {
244            continue;
245        }
246        store.set_config_tree(entry.key, &(entry.default)());
247    }
248}
249
250/// Lightweight presence probe. Avoids loading the whole red_config
251/// collection; scans until the first hit.
252fn is_key_present(store: &UnifiedStore, key: &str) -> bool {
253    let Some(manager) = store.get_collection("red_config") else {
254        return false;
255    };
256    let mut found = false;
257    manager.for_each_entity(|entity| {
258        if let Some(row) = entity.data.as_row() {
259            let entry_key = row.get_field("key").and_then(|v| match v {
260                crate::storage::schema::Value::Text(s) => Some(s.as_ref()),
261                _ => None,
262            });
263            if entry_key == Some(key) {
264                found = true;
265                return false; // short-circuit
266            }
267        }
268        true
269    });
270    found
271}
272
273#[cfg(test)]
274mod tests {
275    use super::*;
276
277    #[test]
278    fn every_matrix_entry_has_a_default_that_resolves() {
279        for entry in MATRIX {
280            let value = (entry.default)();
281            assert!(
282                !matches!(value, JsonValue::Null),
283                "matrix key {} has a null default, defeats self-heal",
284                entry.key
285            );
286        }
287    }
288
289    #[test]
290    fn critical_keys_cover_the_core_guarantees() {
291        // This list is a tripwire — if someone drops one of these
292        // from Tier A without updating callers, the test catches it.
293        let required_critical = [
294            "cache.blob.l1_bytes_max",
295            "cache.blob.l2_bytes_max",
296            "cache.blob.max_namespaces",
297            "durability.mode",
298            "runtime.result_cache.backend",
299            "concurrency.locking.enabled",
300            "storage.wal.max_interval_ms",
301            "storage.bgwriter.delay_ms",
302            "storage.btree.lehman_yao",
303        ];
304        for key in required_critical {
305            assert_eq!(
306                tier_for(key),
307                Some(Tier::Critical),
308                "{key} must be a Tier A (Critical) key",
309            );
310        }
311    }
312
313    #[test]
314    fn optional_keys_are_not_self_healed() {
315        let must_be_optional = [
316            "concurrency.locking.deadlock_timeout_ms",
317            "storage.wal.min_batch_size",
318            "storage.bgwriter.max_pages_per_round",
319            "storage.bgwriter.lru_multiplier",
320            "storage.bulk_insert.max_buffered_rows",
321            "storage.bulk_insert.max_buffered_bytes",
322            "storage.hot_update.max_chain_hops",
323        ];
324        for key in must_be_optional {
325            assert_eq!(tier_for(key), Some(Tier::Optional), "{key} tier mismatch");
326        }
327    }
328
329    #[test]
330    fn unknown_key_returns_none() {
331        assert!(default_for("nonexistent.key").is_none());
332        assert!(tier_for("nonexistent.key").is_none());
333    }
334}