Skip to main content

reddb_server/runtime/ai/
explain_plan_builder.rs

1//! `ExplainPlanBuilder` — pure JSON plan synthesis for `EXPLAIN ASK '...'`.
2//!
3//! Issue #411 (PRD #391): operators want to see the retrieval plan,
4//! the source budget allocation, the provider/model the failover ladder
5//! would pick, and an estimated prompt-token cost — *without* paying for
6//! the LLM call. This module owns the shape of that plan output. It is
7//! a deep module by the same pattern as [`super::sse_frame_encoder`],
8//! [`super::audit_record_builder`], and friends:
9//!
10//! - No I/O, no clock, no LLM. The caller assembles inputs from the
11//!   real retrieval/determinism/provider layers and hands them in.
12//! - The output is a [`crate::serde_json::Value`] with a pinned key
13//!   set so the wiring slice (parser → `execute_ask` → response) and
14//!   downstream tests can rely on byte-stable JSON.
15//! - The `EXPLAIN` path is read-only: AC says no LLM call is made
16//!   and no audit row is written. Keeping this module side-effect free
17//!   is what makes that guarantee enforceable by inspection.
18//!
19//! ## Output shape
20//!
21//! Top-level object, keys alphabetised (BTreeMap-backed):
22//!
23//! ```json
24//! {
25//!   "depth": 2,
26//!   "determinism": { "seed": 12345, "temperature": 0.0 },
27//!   "estimated_cost": {
28//!       "max_completion_tokens": 1024,
29//!       "prompt_tokens": 1500
30//!   },
31//!   "fusion": { "algorithm": "rrf", "k_constant": 60, "limit": 20 },
32//!   "mode": "strict",
33//!   "provider": {
34//!       "model": "gpt-4o-mini",
35//!       "name": "openai",
36//!       "supports_citations": true,
37//!       "supports_seed": true
38//!   },
39//!   "question": "what changed last week?",
40//!   "retrieval": [
41//!       { "bucket": "bm25",   "min_score": 0.0, "top_k": 20 },
42//!       { "bucket": "vector", "min_score": 0.7, "top_k": 20 },
43//!       { "bucket": "graph",  "min_score": 0.0, "top_k": 20 }
44//!   ],
45//!   "sources": [
46//!       { "rank": 1, "rrf_score": 0.0327, "urn": "urn:reddb:row:42" },
47//!       { "rank": 2, "rrf_score": 0.0322, "urn": "urn:reddb:row:17" }
48//!   ]
49//! }
50//! ```
51//!
52//! `determinism.seed` and `determinism.temperature` are omitted when the
53//! provider does not support that knob — the audit-record convention
54//! from #402 (only record what the provider actually got). `sources` is
55//! whatever the retrieval+fusion stages would have produced; an empty
56//! list is well-formed (an honest "we'd retrieve nothing").
57//!
58//! ## Why a separate module
59//!
60//! The EXPLAIN output is part of the public surface — a debugging tool
61//! that operators script against — so the shape needs to be stable
62//! enough that adding a future field can't accidentally rename or shift
63//! an existing one. Centralising the build, with key-set and float-
64//! format tests, gives that stability cheaply.
65
66use crate::serde_json::{Map, Value};
67
68/// One bucket entry in the retrieval section. Mirrors the per-bucket
69/// settings RRF (#398) consumes: `top_k` is the per-ranker cap, and
70/// `min_score` is the per-bucket floor applied before fusion.
71#[derive(Debug, Clone)]
72pub struct BucketPlan {
73    /// Stable bucket name. The wiring layer uses `"bm25"`, `"vector"`,
74    /// `"graph"`; tests pin these.
75    pub bucket: String,
76    pub top_k: u32,
77    pub min_score: f32,
78}
79
80/// One source row in the projected `sources` list. The EXPLAIN path
81/// stops short of materialising payloads (no LLM call), so only the
82/// URN and the fused RRF score are reported.
83#[derive(Debug, Clone)]
84pub struct PlannedSource {
85    pub urn: String,
86    pub rrf_score: f64,
87}
88
89/// Provider/model selection plus the relevant capability flags so a
90/// reader can tell at a glance whether `STRICT` or `SEED` will take
91/// effect.
92#[derive(Debug, Clone)]
93pub struct ProviderSelection {
94    pub name: String,
95    pub model: String,
96    pub supports_citations: bool,
97    pub supports_seed: bool,
98}
99
100/// Effective mode after `ProviderCapabilityRegistry::evaluate_mode`
101/// (#396). The EXPLAIN row reports what would *actually* run, not what
102/// was requested — same convention as the audit row (#402).
103#[derive(Debug, Clone, Copy, PartialEq, Eq)]
104pub enum Mode {
105    Strict,
106    Lenient,
107}
108
109impl Mode {
110    fn as_wire(self) -> &'static str {
111        match self {
112            Mode::Strict => "strict",
113            Mode::Lenient => "lenient",
114        }
115    }
116}
117
118/// Determinism knobs as they will be sent to the provider. `None` means
119/// the provider has no such knob (Anthropic seed, Local temperature) —
120/// the key is omitted from the JSON in that case.
121#[derive(Debug, Clone, Copy, Default)]
122pub struct Determinism {
123    pub temperature: Option<f32>,
124    pub seed: Option<u64>,
125}
126
127/// Token-budget estimates the RRF + prompt-assembly stages can produce
128/// without calling the LLM. `prompt_tokens` is the assembler's best
129/// guess; `max_completion_tokens` is the cap from settings (#401), not
130/// a guess.
131#[derive(Debug, Clone, Copy)]
132pub struct EstimatedCost {
133    pub prompt_tokens: u32,
134    pub max_completion_tokens: u32,
135}
136
137/// All inputs the builder needs. Caller is responsible for assembling
138/// these from the real retrieval/determinism/provider layers; the
139/// builder does not call into them.
140#[derive(Debug, Clone)]
141pub struct Inputs<'a> {
142    pub question: &'a str,
143    pub mode: Mode,
144    pub retrieval: &'a [BucketPlan],
145    pub fusion_limit: u32,
146    pub fusion_k_constant: u32,
147    pub depth: u32,
148    pub sources: &'a [PlannedSource],
149    pub provider: &'a ProviderSelection,
150    pub determinism: Determinism,
151    pub estimated_cost: EstimatedCost,
152}
153
154fn obj(entries: Vec<(&str, Value)>) -> Value {
155    let mut map = Map::new();
156    for (k, v) in entries {
157        map.insert(k.to_string(), v);
158    }
159    Value::Object(map)
160}
161
162fn bucket_value(b: &BucketPlan) -> Value {
163    obj(vec![
164        ("bucket", Value::String(b.bucket.clone())),
165        ("min_score", Value::Number(b.min_score as f64)),
166        ("top_k", Value::Number(b.top_k as f64)),
167    ])
168}
169
170fn source_value(rank: usize, s: &PlannedSource) -> Value {
171    obj(vec![
172        ("rank", Value::Number(rank as f64)),
173        ("rrf_score", Value::Number(s.rrf_score)),
174        ("urn", Value::String(s.urn.clone())),
175    ])
176}
177
178fn provider_value(p: &ProviderSelection) -> Value {
179    obj(vec![
180        ("model", Value::String(p.model.clone())),
181        ("name", Value::String(p.name.clone())),
182        ("supports_citations", Value::Bool(p.supports_citations)),
183        ("supports_seed", Value::Bool(p.supports_seed)),
184    ])
185}
186
187fn determinism_value(d: Determinism) -> Value {
188    let mut entries: Vec<(&str, Value)> = Vec::new();
189    if let Some(seed) = d.seed {
190        entries.push(("seed", Value::Number(seed as f64)));
191    }
192    if let Some(t) = d.temperature {
193        entries.push(("temperature", Value::Number(t as f64)));
194    }
195    obj(entries)
196}
197
198fn cost_value(c: EstimatedCost) -> Value {
199    obj(vec![
200        ("max_completion_tokens", Value::Number(c.max_completion_tokens as f64)),
201        ("prompt_tokens", Value::Number(c.prompt_tokens as f64)),
202    ])
203}
204
205fn fusion_value(limit: u32, k: u32) -> Value {
206    obj(vec![
207        ("algorithm", Value::String("rrf".to_string())),
208        ("k_constant", Value::Number(k as f64)),
209        ("limit", Value::Number(limit as f64)),
210    ])
211}
212
213/// Build the plan JSON. Pure: same inputs → identical [`Value`] bytes.
214/// The wiring layer serializes with `value.to_string_compact()` and
215/// ships it as the response body.
216pub fn build(inputs: &Inputs<'_>) -> Value {
217    obj(vec![
218        ("depth", Value::Number(inputs.depth as f64)),
219        ("determinism", determinism_value(inputs.determinism)),
220        ("estimated_cost", cost_value(inputs.estimated_cost)),
221        (
222            "fusion",
223            fusion_value(inputs.fusion_limit, inputs.fusion_k_constant),
224        ),
225        ("mode", Value::String(inputs.mode.as_wire().to_string())),
226        ("provider", provider_value(inputs.provider)),
227        ("question", Value::String(inputs.question.to_string())),
228        (
229            "retrieval",
230            Value::Array(inputs.retrieval.iter().map(bucket_value).collect()),
231        ),
232        (
233            "sources",
234            Value::Array(
235                inputs
236                    .sources
237                    .iter()
238                    .enumerate()
239                    .map(|(i, s)| source_value(i + 1, s))
240                    .collect(),
241            ),
242        ),
243    ])
244}
245
246#[cfg(test)]
247mod tests {
248    use super::*;
249
250    fn provider_openai() -> ProviderSelection {
251        ProviderSelection {
252            name: "openai".to_string(),
253            model: "gpt-4o-mini".to_string(),
254            supports_citations: true,
255            supports_seed: true,
256        }
257    }
258
259    fn provider_anthropic() -> ProviderSelection {
260        ProviderSelection {
261            name: "anthropic".to_string(),
262            model: "claude-opus-4-7".to_string(),
263            supports_citations: true,
264            supports_seed: false,
265        }
266    }
267
268    fn default_buckets() -> Vec<BucketPlan> {
269        vec![
270            BucketPlan {
271                bucket: "bm25".to_string(),
272                top_k: 20,
273                min_score: 0.0,
274            },
275            BucketPlan {
276                bucket: "vector".to_string(),
277                top_k: 20,
278                min_score: 0.7,
279            },
280            BucketPlan {
281                bucket: "graph".to_string(),
282                top_k: 20,
283                min_score: 0.0,
284            },
285        ]
286    }
287
288    fn fixture<'a>(
289        provider: &'a ProviderSelection,
290        retrieval: &'a [BucketPlan],
291        sources: &'a [PlannedSource],
292        determinism: Determinism,
293    ) -> Inputs<'a> {
294        Inputs {
295            question: "what changed last week?",
296            mode: Mode::Strict,
297            retrieval,
298            fusion_limit: 20,
299            fusion_k_constant: 60,
300            depth: 2,
301            sources,
302            provider,
303            determinism,
304            estimated_cost: EstimatedCost {
305                prompt_tokens: 1500,
306                max_completion_tokens: 1024,
307            },
308        }
309    }
310
311    #[test]
312    fn build_emits_pinned_top_level_keys() {
313        let p = provider_openai();
314        let b = default_buckets();
315        let v = build(&fixture(&p, &b, &[], Determinism::default()));
316        let obj = v.as_object().expect("top-level object");
317        let keys: Vec<&str> = obj.keys().map(|k| k.as_str()).collect();
318        assert_eq!(
319            keys,
320            vec![
321                "depth",
322                "determinism",
323                "estimated_cost",
324                "fusion",
325                "mode",
326                "provider",
327                "question",
328                "retrieval",
329                "sources",
330            ]
331        );
332    }
333
334    #[test]
335    fn build_is_deterministic_across_calls() {
336        let p = provider_openai();
337        let b = default_buckets();
338        let s = vec![PlannedSource {
339            urn: "urn:reddb:row:1".to_string(),
340            rrf_score: 0.0327,
341        }];
342        let d = Determinism {
343            temperature: Some(0.0),
344            seed: Some(12345),
345        };
346        let a = build(&fixture(&p, &b, &s, d));
347        let b2 = build(&fixture(&p, &b, &s, d));
348        assert_eq!(a.to_string_compact(), b2.to_string_compact());
349    }
350
351    #[test]
352    fn mode_serializes_as_lowercase_words() {
353        let p = provider_openai();
354        let b = default_buckets();
355        let mut inp = fixture(&p, &b, &[], Determinism::default());
356        inp.mode = Mode::Lenient;
357        let v = build(&inp);
358        assert_eq!(v.get("mode").and_then(|x| x.as_str()), Some("lenient"));
359        let mut inp2 = fixture(&p, &b, &[], Determinism::default());
360        inp2.mode = Mode::Strict;
361        let v2 = build(&inp2);
362        assert_eq!(v2.get("mode").and_then(|x| x.as_str()), Some("strict"));
363    }
364
365    #[test]
366    fn determinism_omits_seed_when_provider_does_not_support_it() {
367        // Anthropic-style: temperature only, no seed. The audit row
368        // (#402) records only what the provider got — EXPLAIN does the
369        // same, so an operator reading the plan can immediately see
370        // that SEED has no effect on this provider.
371        let p = provider_anthropic();
372        let b = default_buckets();
373        let d = Determinism {
374            temperature: Some(0.0),
375            seed: None,
376        };
377        let v = build(&fixture(&p, &b, &[], d));
378        let det = v.get("determinism").and_then(|x| x.as_object()).unwrap();
379        assert!(det.contains_key("temperature"));
380        assert!(!det.contains_key("seed"));
381    }
382
383    #[test]
384    fn determinism_omits_temperature_for_local_class_providers() {
385        // Local endpoints take no temperature at all (per #396). The
386        // EXPLAIN row must reflect that — a present `temperature: 0.0`
387        // would be a lie.
388        let p = ProviderSelection {
389            name: "local".to_string(),
390            model: "ggml".to_string(),
391            supports_citations: false,
392            supports_seed: false,
393        };
394        let b = default_buckets();
395        let d = Determinism {
396            temperature: None,
397            seed: None,
398        };
399        let v = build(&fixture(&p, &b, &[], d));
400        let det = v.get("determinism").and_then(|x| x.as_object()).unwrap();
401        assert!(det.is_empty());
402    }
403
404    #[test]
405    fn seed_zero_is_preserved_distinct_from_none() {
406        // Same guard `DeterminismDecider` and `AnswerCacheKey` pin:
407        // `Some(0)` is a real value, not a "no seed" sentinel.
408        let p = provider_openai();
409        let b = default_buckets();
410        let d = Determinism {
411            temperature: Some(0.0),
412            seed: Some(0),
413        };
414        let v = build(&fixture(&p, &b, &[], d));
415        let det = v.get("determinism").and_then(|x| x.as_object()).unwrap();
416        assert!(det.contains_key("seed"));
417        assert_eq!(det.get("seed").and_then(|x| x.as_u64()), Some(0));
418    }
419
420    #[test]
421    fn retrieval_preserves_input_order_per_bucket() {
422        // Bucket order is meaningful — RRF doesn't care, but a reader
423        // scanning the plan expects bm25, vector, graph in the order
424        // the wiring layer hands them in.
425        let p = provider_openai();
426        let b = default_buckets();
427        let v = build(&fixture(&p, &b, &[], Determinism::default()));
428        let buckets = v.get("retrieval").and_then(|x| x.as_array()).unwrap();
429        let names: Vec<&str> = buckets
430            .iter()
431            .map(|b| b.get("bucket").and_then(|x| x.as_str()).unwrap())
432            .collect();
433        assert_eq!(names, vec!["bm25", "vector", "graph"]);
434    }
435
436    #[test]
437    fn retrieval_carries_per_bucket_min_score() {
438        // BM25 0.4 and cosine 0.7 are different scales — RRF (#398)
439        // applies the floor per-bucket. EXPLAIN must surface the same
440        // per-bucket floor or it would mislead a reader debugging
441        // `MIN_SCORE`.
442        let p = provider_openai();
443        let b = default_buckets();
444        let v = build(&fixture(&p, &b, &[], Determinism::default()));
445        let buckets = v.get("retrieval").and_then(|x| x.as_array()).unwrap();
446        let vector = &buckets[1];
447        let v_score = vector.get("min_score").and_then(|x| x.as_f64()).unwrap();
448        // f32 → f64 widening is lossy below the decimal — compare with
449        // an epsilon rather than pinning the widened bit pattern.
450        assert!((v_score - 0.7).abs() < 1e-6, "got {v_score}");
451        let bm25 = &buckets[0];
452        assert_eq!(
453            bm25.get("min_score").and_then(|x| x.as_f64()),
454            Some(0.0)
455        );
456    }
457
458    #[test]
459    fn sources_emit_one_indexed_rank() {
460        let p = provider_openai();
461        let b = default_buckets();
462        let s = vec![
463            PlannedSource {
464                urn: "urn:a".to_string(),
465                rrf_score: 0.05,
466            },
467            PlannedSource {
468                urn: "urn:b".to_string(),
469                rrf_score: 0.04,
470            },
471            PlannedSource {
472                urn: "urn:c".to_string(),
473                rrf_score: 0.03,
474            },
475        ];
476        let v = build(&fixture(&p, &b, &s, Determinism::default()));
477        let arr = v.get("sources").and_then(|x| x.as_array()).unwrap();
478        let ranks: Vec<u64> = arr
479            .iter()
480            .map(|s| s.get("rank").and_then(|x| x.as_u64()).unwrap())
481            .collect();
482        assert_eq!(ranks, vec![1, 2, 3]);
483    }
484
485    #[test]
486    fn sources_preserve_input_order() {
487        // The caller passes sources in their post-RRF rank order;
488        // EXPLAIN must not re-sort them. Pinning here keeps the wiring
489        // slice free to assume `inputs.sources[0]` is rank 1.
490        let p = provider_openai();
491        let b = default_buckets();
492        let s = vec![
493            PlannedSource {
494                urn: "urn:z".to_string(),
495                rrf_score: 0.05,
496            },
497            PlannedSource {
498                urn: "urn:a".to_string(),
499                rrf_score: 0.04,
500            },
501        ];
502        let v = build(&fixture(&p, &b, &s, Determinism::default()));
503        let arr = v.get("sources").and_then(|x| x.as_array()).unwrap();
504        let urns: Vec<&str> = arr
505            .iter()
506            .map(|s| s.get("urn").and_then(|x| x.as_str()).unwrap())
507            .collect();
508        assert_eq!(urns, vec!["urn:z", "urn:a"]);
509    }
510
511    #[test]
512    fn empty_sources_is_well_formed() {
513        // No retrieval matches → empty array, not missing key.
514        let p = provider_openai();
515        let b = default_buckets();
516        let v = build(&fixture(&p, &b, &[], Determinism::default()));
517        let arr = v.get("sources").and_then(|x| x.as_array()).unwrap();
518        assert!(arr.is_empty());
519    }
520
521    #[test]
522    fn empty_retrieval_is_well_formed() {
523        // A future single-bucket variant (e.g. text-only) might pass
524        // zero buckets in some path — empty is still valid JSON.
525        let p = provider_openai();
526        let v = build(&fixture(&p, &[], &[], Determinism::default()));
527        let arr = v.get("retrieval").and_then(|x| x.as_array()).unwrap();
528        assert!(arr.is_empty());
529    }
530
531    #[test]
532    fn fusion_section_pins_rrf_and_k_constant() {
533        // RRF k=60 is the Cormack 2009 baseline #398 already pins;
534        // EXPLAIN surfaces it so an operator can confirm what's used.
535        let p = provider_openai();
536        let b = default_buckets();
537        let v = build(&fixture(&p, &b, &[], Determinism::default()));
538        let fusion = v.get("fusion").and_then(|x| x.as_object()).unwrap();
539        assert_eq!(
540            fusion.get("algorithm").and_then(|x| x.as_str()),
541            Some("rrf")
542        );
543        assert_eq!(fusion.get("k_constant").and_then(|x| x.as_u64()), Some(60));
544        assert_eq!(fusion.get("limit").and_then(|x| x.as_u64()), Some(20));
545    }
546
547    #[test]
548    fn provider_section_carries_capability_flags() {
549        let p = provider_anthropic();
550        let b = default_buckets();
551        let v = build(&fixture(&p, &b, &[], Determinism::default()));
552        let prov = v.get("provider").and_then(|x| x.as_object()).unwrap();
553        assert_eq!(prov.get("name").and_then(|x| x.as_str()), Some("anthropic"));
554        assert_eq!(
555            prov.get("supports_citations").and_then(|x| x.as_bool()),
556            Some(true)
557        );
558        assert_eq!(
559            prov.get("supports_seed").and_then(|x| x.as_bool()),
560            Some(false)
561        );
562    }
563
564    #[test]
565    fn estimated_cost_pins_keys_and_values() {
566        let p = provider_openai();
567        let b = default_buckets();
568        let v = build(&fixture(&p, &b, &[], Determinism::default()));
569        let c = v.get("estimated_cost").and_then(|x| x.as_object()).unwrap();
570        let keys: Vec<&str> = c.keys().map(|k| k.as_str()).collect();
571        assert_eq!(keys, vec!["max_completion_tokens", "prompt_tokens"]);
572        assert_eq!(
573            c.get("prompt_tokens").and_then(|x| x.as_u64()),
574            Some(1500)
575        );
576        assert_eq!(
577            c.get("max_completion_tokens").and_then(|x| x.as_u64()),
578            Some(1024)
579        );
580    }
581
582    #[test]
583    fn question_is_passed_through_verbatim() {
584        // No truncation, no normalisation. Operators paste questions
585        // back into the next ASK call from EXPLAIN output, so byte
586        // equality is the contract.
587        let p = provider_openai();
588        let b = default_buckets();
589        let mut inp = fixture(&p, &b, &[], Determinism::default());
590        let q = "weird \"quotes\" + newlines\nstill ok?";
591        inp.question = q;
592        let v = build(&inp);
593        assert_eq!(v.get("question").and_then(|x| x.as_str()), Some(q));
594    }
595
596    #[test]
597    fn depth_is_pass_through_u32() {
598        let p = provider_openai();
599        let b = default_buckets();
600        let mut inp = fixture(&p, &b, &[], Determinism::default());
601        inp.depth = 5;
602        let v = build(&inp);
603        assert_eq!(v.get("depth").and_then(|x| x.as_u64()), Some(5));
604    }
605}