Skip to main content

reddb_server/runtime/ai/
ask_response_envelope.rs

1//! `AskResponseEnvelope` — pure serializer for the canonical
2//! non-streaming ASK JSON response (issue #406, PRD #391).
3//!
4//! Deep module: no I/O, no transport, no clock. Owns the on-wire JSON
5//! shape that the embedded stdio JSON-RPC `query` method returns, and
6//! that gRPC (#407), Postgres-wire (#408), and MCP non-stream (#409)
7//! all embed verbatim. Pinning the shape here means a future transport
8//! slice cannot accidentally drop `citations`, rename `cache_hit`, or
9//! re-shape `validation` without the tests in this file failing first.
10//!
11//! ## Why a separate module
12//!
13//! ASK has six new fields the legacy bucketed response did not carry
14//! (`answer`, `sources_flat`, `citations`, `validation`, `cache_hit`,
15//! `cost_usd`). The acceptance criteria for #406 require every one
16//! present in the JSON-RPC response. That's a "field-presence" bug
17//! surface — easy to forget one when the wiring slice lands and hard
18//! to notice in review. Building the JSON in a tested deep module
19//! keeps the wiring slice focused on "where do I write these bytes"
20//! and the contract here on "are the bytes right".
21//!
22//! ## Shape pinned by tests
23//!
24//! Top-level keys (alphabetised by the `BTreeMap`-backed encoder):
25//!
26//! - `answer` — full answer text with inline `[^N]` markers.
27//! - `cache_hit` — bool.
28//! - `citations` — `[{marker, urn}]`, sorted by marker ascending.
29//! - `completion_tokens` — number.
30//! - `cost_usd` — number.
31//! - `mode` — `"strict"` or `"lenient"`, the *effective* mode after
32//!   provider-capability fallback (#396) — mirrors the audit row #402.
33//! - `model` — string.
34//! - `prompt_tokens` — number.
35//! - `provider` — string.
36//! - `retry_count` — number (0 or 1 per #395's one-retry budget).
37//! - `sources_flat` — `[{payload, urn}]`, post-RRF (#398) order
38//!   preserved verbatim so the client can map `[^N]` → `sources_flat[N-1]`.
39//! - `validation` — `{errors, ok, warnings}`. Errors and warnings are
40//!   `[{detail, kind}]` to match the shapes audit_record_builder (#402)
41//!   and sse_frame_encoder (#405) already pin.
42//!
43//! Determinism = seed (#400) is *not* in the response. It's recorded
44//! in the audit row, not surfaced to the caller — leaking the seed
45//! would let a hostile caller replay deterministic answers.
46
47use crate::serde_json::{Map, Value};
48
49/// One row from `sources_flat`. `urn` is the engine entity URN,
50/// `payload` is the column-policy-redacted JSON serialised as a
51/// string so the envelope JSON stays flat (the client re-parses if
52/// it wants structure — matches the SSE `sources` frame shape #405).
53#[derive(Debug, Clone, PartialEq)]
54pub struct SourceRow {
55    pub urn: String,
56    pub payload: String,
57}
58
59/// One citation: `[^N]` in the answer ↔ `sources_flat[N-1]`.
60#[derive(Debug, Clone, PartialEq)]
61pub struct Citation {
62    pub marker: u32,
63    pub urn: String,
64}
65
66/// One validation warning. Same shape as the SSE terminal frame so
67/// HTTP clients can share parsing code across streaming and non-
68/// streaming paths.
69#[derive(Debug, Clone, PartialEq)]
70pub struct ValidationWarning {
71    pub kind: String,
72    pub detail: String,
73}
74
75/// One validation error. Same shape as warnings; `kind` is one of
76/// `"malformed"` / `"out_of_range"` per #395's `ValidationErrorKind`.
77#[derive(Debug, Clone, PartialEq)]
78pub struct ValidationError {
79    pub kind: String,
80    pub detail: String,
81}
82
83/// Validation block. `ok = false` with non-empty `errors` corresponds
84/// to the HTTP 422 path on retry exhaustion (#395).
85#[derive(Debug, Clone, PartialEq)]
86pub struct Validation {
87    pub ok: bool,
88    pub warnings: Vec<ValidationWarning>,
89    pub errors: Vec<ValidationError>,
90}
91
92/// Effective mode actually applied — *after* provider-capability
93/// fallback (#396). The originally-requested mode is recorded in the
94/// audit row, not here.
95#[derive(Debug, Clone, Copy, PartialEq, Eq)]
96pub enum Mode {
97    Strict,
98    Lenient,
99}
100
101impl Mode {
102    fn as_str(self) -> &'static str {
103        match self {
104            Mode::Strict => "strict",
105            Mode::Lenient => "lenient",
106        }
107    }
108}
109
110/// Internal result a non-streaming ASK call produces — input to
111/// [`build`]. The wiring slice (deferred) constructs this from
112/// `execute_ask`'s outputs.
113#[derive(Debug, Clone)]
114pub struct AskResult {
115    pub answer: String,
116    pub sources_flat: Vec<SourceRow>,
117    pub citations: Vec<Citation>,
118    pub validation: Validation,
119    pub cache_hit: bool,
120    pub provider: String,
121    pub model: String,
122    pub prompt_tokens: u32,
123    pub completion_tokens: u32,
124    pub cost_usd: f64,
125    pub effective_mode: Mode,
126    pub retry_count: u32,
127}
128
129/// Serialise an [`AskResult`] to its canonical JSON envelope.
130///
131/// Output is a `Value::Object` ready to drop into a JSON-RPC `result`
132/// field, a gRPC message, or a Postgres-wire single-row result set.
133/// Re-running on byte-equal input is byte-equal output (pinned by
134/// `build_is_deterministic_across_calls`) — required by the ASK
135/// determinism contract (#400).
136pub fn build(result: &AskResult) -> Value {
137    let mut m = Map::new();
138    m.insert("answer".into(), Value::String(result.answer.clone()));
139    m.insert("cache_hit".into(), Value::Bool(result.cache_hit));
140    m.insert("citations".into(), citations_value(&result.citations));
141    m.insert(
142        "completion_tokens".into(),
143        Value::Number(result.completion_tokens as f64),
144    );
145    m.insert("cost_usd".into(), Value::Number(result.cost_usd));
146    m.insert("mode".into(), Value::String(result.effective_mode.as_str().into()));
147    m.insert("model".into(), Value::String(result.model.clone()));
148    m.insert(
149        "prompt_tokens".into(),
150        Value::Number(result.prompt_tokens as f64),
151    );
152    m.insert("provider".into(), Value::String(result.provider.clone()));
153    m.insert("retry_count".into(), Value::Number(result.retry_count as f64));
154    m.insert("sources_flat".into(), sources_value(&result.sources_flat));
155    m.insert("validation".into(), validation_value(&result.validation));
156    Value::Object(m)
157}
158
159fn citations_value(cites: &[Citation]) -> Value {
160    // Marker order is the contract — `[^1]` must come before `[^2]`
161    // in the array so the index aligns with the marker. Pinned by
162    // `citations_are_sorted_by_marker_ascending`.
163    let mut sorted: Vec<Citation> = cites.to_vec();
164    sorted.sort_by_key(|c| c.marker);
165    Value::Array(
166        sorted
167            .iter()
168            .map(|c| {
169                let mut o = Map::new();
170                o.insert("marker".into(), Value::Number(c.marker as f64));
171                o.insert("urn".into(), Value::String(c.urn.clone()));
172                Value::Object(o)
173            })
174            .collect(),
175    )
176}
177
178fn sources_value(rows: &[SourceRow]) -> Value {
179    Value::Array(
180        rows.iter()
181            .map(|r| {
182                let mut o = Map::new();
183                o.insert("payload".into(), Value::String(r.payload.clone()));
184                o.insert("urn".into(), Value::String(r.urn.clone()));
185                Value::Object(o)
186            })
187            .collect(),
188    )
189}
190
191fn warning_value(w: &ValidationWarning) -> Value {
192    let mut o = Map::new();
193    o.insert("detail".into(), Value::String(w.detail.clone()));
194    o.insert("kind".into(), Value::String(w.kind.clone()));
195    Value::Object(o)
196}
197
198fn error_value(e: &ValidationError) -> Value {
199    let mut o = Map::new();
200    o.insert("detail".into(), Value::String(e.detail.clone()));
201    o.insert("kind".into(), Value::String(e.kind.clone()));
202    Value::Object(o)
203}
204
205fn validation_value(v: &Validation) -> Value {
206    let mut o = Map::new();
207    o.insert(
208        "errors".into(),
209        Value::Array(v.errors.iter().map(error_value).collect()),
210    );
211    o.insert("ok".into(), Value::Bool(v.ok));
212    o.insert(
213        "warnings".into(),
214        Value::Array(v.warnings.iter().map(warning_value).collect()),
215    );
216    Value::Object(o)
217}
218
219#[cfg(test)]
220mod tests {
221    use super::*;
222
223    fn fixture() -> AskResult {
224        AskResult {
225            answer: "X is 42 [^1].".into(),
226            sources_flat: vec![SourceRow {
227                urn: "urn:reddb:row:1".into(),
228                payload: "{\"k\":\"v\"}".into(),
229            }],
230            citations: vec![Citation {
231                marker: 1,
232                urn: "urn:reddb:row:1".into(),
233            }],
234            validation: Validation {
235                ok: true,
236                warnings: vec![],
237                errors: vec![],
238            },
239            cache_hit: false,
240            provider: "openai".into(),
241            model: "gpt-4o-mini".into(),
242            prompt_tokens: 123,
243            completion_tokens: 45,
244            cost_usd: 0.000_321,
245            effective_mode: Mode::Strict,
246            retry_count: 0,
247        }
248    }
249
250    #[test]
251    fn build_emits_every_required_key() {
252        let v = build(&fixture());
253        let obj = v.as_object().unwrap();
254        let mut keys: Vec<&str> = obj.keys().map(|s| s.as_str()).collect();
255        keys.sort();
256        assert_eq!(
257            keys,
258            vec![
259                "answer",
260                "cache_hit",
261                "citations",
262                "completion_tokens",
263                "cost_usd",
264                "mode",
265                "model",
266                "prompt_tokens",
267                "provider",
268                "retry_count",
269                "sources_flat",
270                "validation",
271            ]
272        );
273    }
274
275    #[test]
276    fn answer_text_preserved_with_inline_markers() {
277        let v = build(&fixture());
278        assert_eq!(v.get("answer").and_then(|x| x.as_str()), Some("X is 42 [^1]."));
279    }
280
281    #[test]
282    fn cache_hit_serializes_as_bool() {
283        let mut r = fixture();
284        r.cache_hit = true;
285        let v = build(&r);
286        assert_eq!(v.get("cache_hit").and_then(|x| x.as_bool()), Some(true));
287    }
288
289    #[test]
290    fn citations_are_sorted_by_marker_ascending() {
291        let mut r = fixture();
292        r.citations = vec![
293            Citation {
294                marker: 3,
295                urn: "urn:c".into(),
296            },
297            Citation {
298                marker: 1,
299                urn: "urn:a".into(),
300            },
301            Citation {
302                marker: 2,
303                urn: "urn:b".into(),
304            },
305        ];
306        let v = build(&r);
307        let arr = v.get("citations").and_then(|x| x.as_array()).unwrap();
308        let markers: Vec<u64> = arr
309            .iter()
310            .map(|c| c.get("marker").and_then(|m| m.as_u64()).unwrap())
311            .collect();
312        assert_eq!(markers, vec![1, 2, 3]);
313    }
314
315    #[test]
316    fn sources_flat_preserves_input_order() {
317        // Post-RRF order is the contract — `[^N]` indexes into this
318        // array, so reordering would silently break grounding.
319        let mut r = fixture();
320        r.sources_flat = vec![
321            SourceRow {
322                urn: "urn:z".into(),
323                payload: "{}".into(),
324            },
325            SourceRow {
326                urn: "urn:a".into(),
327                payload: "{}".into(),
328            },
329        ];
330        let v = build(&r);
331        let arr = v.get("sources_flat").and_then(|x| x.as_array()).unwrap();
332        assert_eq!(
333            arr[0].get("urn").and_then(|x| x.as_str()),
334            Some("urn:z")
335        );
336        assert_eq!(
337            arr[1].get("urn").and_then(|x| x.as_str()),
338            Some("urn:a")
339        );
340    }
341
342    #[test]
343    fn sources_row_carries_payload_as_string() {
344        let v = build(&fixture());
345        let arr = v.get("sources_flat").and_then(|x| x.as_array()).unwrap();
346        assert_eq!(
347            arr[0].get("payload").and_then(|x| x.as_str()),
348            Some("{\"k\":\"v\"}")
349        );
350    }
351
352    #[test]
353    fn validation_ok_carries_empty_arrays() {
354        let v = build(&fixture());
355        let val = v.get("validation").unwrap();
356        assert_eq!(val.get("ok").and_then(|x| x.as_bool()), Some(true));
357        assert_eq!(val.get("warnings").and_then(|x| x.as_array()).unwrap().len(), 0);
358        assert_eq!(val.get("errors").and_then(|x| x.as_array()).unwrap().len(), 0);
359    }
360
361    #[test]
362    fn validation_carries_warnings_and_errors_with_kind_detail() {
363        let mut r = fixture();
364        r.validation = Validation {
365            ok: false,
366            warnings: vec![ValidationWarning {
367                kind: "mode_fallback".into(),
368                detail: "ollama".into(),
369            }],
370            errors: vec![ValidationError {
371                kind: "out_of_range".into(),
372                detail: "marker 7 > 3 sources".into(),
373            }],
374        };
375        let v = build(&r);
376        let val = v.get("validation").unwrap();
377        assert_eq!(val.get("ok").and_then(|x| x.as_bool()), Some(false));
378        let warns = val.get("warnings").and_then(|x| x.as_array()).unwrap();
379        assert_eq!(warns[0].get("kind").and_then(|x| x.as_str()), Some("mode_fallback"));
380        assert_eq!(warns[0].get("detail").and_then(|x| x.as_str()), Some("ollama"));
381        let errs = val.get("errors").and_then(|x| x.as_array()).unwrap();
382        assert_eq!(errs[0].get("kind").and_then(|x| x.as_str()), Some("out_of_range"));
383    }
384
385    #[test]
386    fn mode_serializes_as_strict_or_lenient() {
387        let mut r = fixture();
388        r.effective_mode = Mode::Strict;
389        assert_eq!(build(&r).get("mode").and_then(|x| x.as_str()), Some("strict"));
390        r.effective_mode = Mode::Lenient;
391        assert_eq!(build(&r).get("mode").and_then(|x| x.as_str()), Some("lenient"));
392    }
393
394    #[test]
395    fn usage_fields_flat_at_top_level() {
396        // Matches the audit row shape (#402) and SSE audit frame
397        // (#405). Nested `usage: {...}` would force every transport
398        // and SDK to re-shape.
399        let v = build(&fixture());
400        assert_eq!(v.get("prompt_tokens").and_then(|x| x.as_u64()), Some(123));
401        assert_eq!(v.get("completion_tokens").and_then(|x| x.as_u64()), Some(45));
402        assert!(v.get("cost_usd").is_some());
403    }
404
405    #[test]
406    fn cost_usd_keeps_fractional_precision() {
407        let mut r = fixture();
408        r.cost_usd = 0.000_321;
409        let v = build(&r);
410        assert_eq!(v.get("cost_usd").and_then(|x| x.as_f64()), Some(0.000_321));
411    }
412
413    #[test]
414    fn retry_count_zero_and_one_both_round_trip() {
415        // #395 caps retries at one — pinning both endpoints guards
416        // against an off-by-one if the budget ever changes.
417        let mut r = fixture();
418        r.retry_count = 0;
419        assert_eq!(
420            build(&r).get("retry_count").and_then(|x| x.as_u64()),
421            Some(0)
422        );
423        r.retry_count = 1;
424        assert_eq!(
425            build(&r).get("retry_count").and_then(|x| x.as_u64()),
426            Some(1)
427        );
428    }
429
430    #[test]
431    fn does_not_expose_seed_or_temperature() {
432        // Determinism inputs (#400) are recorded in the audit row,
433        // not surfaced to the caller. Leaking the seed would let a
434        // hostile caller replay deterministic answers.
435        let v = build(&fixture());
436        let obj = v.as_object().unwrap();
437        assert!(!obj.contains_key("seed"));
438        assert!(!obj.contains_key("temperature"));
439    }
440
441    #[test]
442    fn empty_sources_and_citations_are_arrays_not_null() {
443        // Empty arrays are well-formed (`STRICT OFF` on a refusal can
444        // produce no citations). Missing keys would break a downstream
445        // `.length` access.
446        let mut r = fixture();
447        r.sources_flat = vec![];
448        r.citations = vec![];
449        let v = build(&r);
450        assert!(v.get("sources_flat").and_then(|x| x.as_array()).unwrap().is_empty());
451        assert!(v.get("citations").and_then(|x| x.as_array()).unwrap().is_empty());
452    }
453
454    #[test]
455    fn answer_escaping_handled_by_compact_encoder() {
456        let mut r = fixture();
457        r.answer = "she said \"hi\"\nnewline".into();
458        let bytes = build(&r).to_string_compact();
459        assert!(bytes.contains(r#"\"hi\""#));
460        assert!(bytes.contains(r#"\n"#));
461    }
462
463    #[test]
464    fn build_is_deterministic_across_calls() {
465        let r = fixture();
466        let a = build(&r).to_string_compact();
467        let b = build(&r).to_string_compact();
468        assert_eq!(a, b);
469    }
470
471    #[test]
472    fn build_is_deterministic_across_clone_inputs() {
473        let r1 = fixture();
474        let r2 = r1.clone();
475        assert_eq!(build(&r1).to_string_compact(), build(&r2).to_string_compact());
476    }
477
478    #[test]
479    fn top_level_key_order_is_alphabetical() {
480        // Pinned because clients on weak parsers (e.g. some PG-wire
481        // bindings doing string slicing) have been known to depend on
482        // it. BTreeMap-backed encoder gives it for free.
483        let bytes = build(&fixture()).to_string_compact();
484        let answer_pos = bytes.find("\"answer\"").unwrap();
485        let cache_pos = bytes.find("\"cache_hit\"").unwrap();
486        let citations_pos = bytes.find("\"citations\"").unwrap();
487        let validation_pos = bytes.find("\"validation\"").unwrap();
488        assert!(answer_pos < cache_pos);
489        assert!(cache_pos < citations_pos);
490        assert!(citations_pos < validation_pos);
491    }
492
493    #[test]
494    fn citation_with_same_marker_is_stable_under_sort() {
495        // Defensive: if two citations share a marker (malformed input
496        // from the validator path), the sort must be stable so the
497        // input order is preserved. Pinned because a different sort
498        // strategy (unstable + tie on marker) would non-determinise
499        // the response and break #400.
500        let mut r = fixture();
501        r.citations = vec![
502            Citation {
503                marker: 1,
504                urn: "urn:first".into(),
505            },
506            Citation {
507                marker: 1,
508                urn: "urn:second".into(),
509            },
510        ];
511        let v = build(&r);
512        let arr = v.get("citations").and_then(|x| x.as_array()).unwrap();
513        assert_eq!(arr[0].get("urn").and_then(|x| x.as_str()), Some("urn:first"));
514        assert_eq!(arr[1].get("urn").and_then(|x| x.as_str()), Some("urn:second"));
515    }
516}