Skip to main content

reddb_server/runtime/ai/
ask_response_envelope.rs

1//! `AskResponseEnvelope` — pure serializer for the canonical
2//! non-streaming ASK JSON response (issue #406, PRD #391).
3//!
4//! Deep module: no I/O, no transport, no clock. Owns the on-wire JSON
5//! shape that the embedded stdio JSON-RPC `query` method returns, and
6//! that gRPC (#407), Postgres-wire (#408), and MCP non-stream (#409)
7//! all embed verbatim. Pinning the shape here means a future transport
8//! slice cannot accidentally drop `citations`, rename `cache_hit`, or
9//! re-shape `validation` without the tests in this file failing first.
10//!
11//! ## Why a separate module
12//!
13//! ASK has six new fields the legacy bucketed response did not carry
14//! (`answer`, `sources_flat`, `citations`, `validation`, `cache_hit`,
15//! `cost_usd`). The acceptance criteria for #406 require every one
16//! present in the JSON-RPC response. That's a "field-presence" bug
17//! surface — easy to forget one when the wiring slice lands and hard
18//! to notice in review. Building the JSON in a tested deep module
19//! keeps the wiring slice focused on "where do I write these bytes"
20//! and the contract here on "are the bytes right".
21//!
22//! ## Shape pinned by tests
23//!
24//! Top-level keys (alphabetised by the `BTreeMap`-backed encoder):
25//!
26//! - `answer` — full answer text with inline `[^N]` markers.
27//! - `cache_hit` — bool.
28//! - `citations` — `[{marker, urn}]`, sorted by marker ascending.
29//! - `completion_tokens` — number.
30//! - `cost_usd` — number.
31//! - `mode` — `"strict"` or `"lenient"`, the *effective* mode after
32//!   provider-capability fallback (#396) — mirrors the audit row #402.
33//! - `model` — string.
34//! - `prompt_tokens` — number.
35//! - `provider` — string.
36//! - `retry_count` — number (0 or 1 per #395's one-retry budget).
37//! - `sources_flat` — `[{payload, urn}]`, post-RRF (#398) order
38//!   preserved verbatim so the client can map `[^N]` → `sources_flat[N-1]`.
39//! - `validation` — `{errors, ok, warnings}`. Errors and warnings are
40//!   `[{detail, kind}]` to match the shapes audit_record_builder (#402)
41//!   and sse_frame_encoder (#405) already pin.
42//!
43//! Determinism = seed (#400) is *not* in the response. It's recorded
44//! in the audit row, not surfaced to the caller — leaking the seed
45//! would let a hostile caller replay deterministic answers.
46
47use crate::serde_json::{Map, Value};
48
49/// One row from `sources_flat`. `urn` is the engine entity URN,
50/// `payload` is the column-policy-redacted JSON serialised as a
51/// string so the envelope JSON stays flat (the client re-parses if
52/// it wants structure — matches the SSE `sources` frame shape #405).
53#[derive(Debug, Clone, PartialEq)]
54pub struct SourceRow {
55    pub urn: String,
56    pub payload: String,
57}
58
59/// One citation: `[^N]` in the answer ↔ `sources_flat[N-1]`.
60#[derive(Debug, Clone, PartialEq)]
61pub struct Citation {
62    pub marker: u32,
63    pub urn: String,
64}
65
66/// One validation warning. Same shape as the SSE terminal frame so
67/// HTTP clients can share parsing code across streaming and non-
68/// streaming paths.
69#[derive(Debug, Clone, PartialEq)]
70pub struct ValidationWarning {
71    pub kind: String,
72    pub detail: String,
73}
74
75/// One validation error. Same shape as warnings; `kind` is one of
76/// `"malformed"` / `"out_of_range"` per #395's `ValidationErrorKind`.
77#[derive(Debug, Clone, PartialEq)]
78pub struct ValidationError {
79    pub kind: String,
80    pub detail: String,
81}
82
83/// Validation block. `ok = false` with non-empty `errors` corresponds
84/// to the HTTP 422 path on retry exhaustion (#395).
85#[derive(Debug, Clone, PartialEq)]
86pub struct Validation {
87    pub ok: bool,
88    pub warnings: Vec<ValidationWarning>,
89    pub errors: Vec<ValidationError>,
90}
91
92/// Effective mode actually applied — *after* provider-capability
93/// fallback (#396). The originally-requested mode is recorded in the
94/// audit row, not here.
95#[derive(Debug, Clone, Copy, PartialEq, Eq)]
96pub enum Mode {
97    Strict,
98    Lenient,
99}
100
101impl Mode {
102    fn as_str(self) -> &'static str {
103        match self {
104            Mode::Strict => "strict",
105            Mode::Lenient => "lenient",
106        }
107    }
108}
109
110/// Internal result a non-streaming ASK call produces — input to
111/// [`build`]. The wiring slice (deferred) constructs this from
112/// `execute_ask`'s outputs.
113#[derive(Debug, Clone)]
114pub struct AskResult {
115    pub answer: String,
116    pub sources_flat: Vec<SourceRow>,
117    pub citations: Vec<Citation>,
118    pub validation: Validation,
119    pub cache_hit: bool,
120    pub provider: String,
121    pub model: String,
122    pub prompt_tokens: u32,
123    pub completion_tokens: u32,
124    pub cost_usd: f64,
125    pub effective_mode: Mode,
126    pub retry_count: u32,
127}
128
129/// Serialise an [`AskResult`] to its canonical JSON envelope.
130///
131/// Output is a `Value::Object` ready to drop into a JSON-RPC `result`
132/// field, a gRPC message, or a Postgres-wire single-row result set.
133/// Re-running on byte-equal input is byte-equal output (pinned by
134/// `build_is_deterministic_across_calls`) — required by the ASK
135/// determinism contract (#400).
136pub fn build(result: &AskResult) -> Value {
137    let mut m = Map::new();
138    m.insert("answer".into(), Value::String(result.answer.clone()));
139    m.insert("cache_hit".into(), Value::Bool(result.cache_hit));
140    m.insert("citations".into(), citations_value(&result.citations));
141    m.insert(
142        "completion_tokens".into(),
143        Value::Number(result.completion_tokens as f64),
144    );
145    m.insert("cost_usd".into(), Value::Number(result.cost_usd));
146    m.insert(
147        "mode".into(),
148        Value::String(result.effective_mode.as_str().into()),
149    );
150    m.insert("model".into(), Value::String(result.model.clone()));
151    m.insert(
152        "prompt_tokens".into(),
153        Value::Number(result.prompt_tokens as f64),
154    );
155    m.insert("provider".into(), Value::String(result.provider.clone()));
156    m.insert(
157        "retry_count".into(),
158        Value::Number(result.retry_count as f64),
159    );
160    m.insert("sources_flat".into(), sources_value(&result.sources_flat));
161    m.insert("validation".into(), validation_value(&result.validation));
162    Value::Object(m)
163}
164
165fn citations_value(cites: &[Citation]) -> Value {
166    // Marker order is the contract — `[^1]` must come before `[^2]`
167    // in the array so the index aligns with the marker. Pinned by
168    // `citations_are_sorted_by_marker_ascending`.
169    let mut sorted: Vec<Citation> = cites.to_vec();
170    sorted.sort_by_key(|c| c.marker);
171    Value::Array(
172        sorted
173            .iter()
174            .map(|c| {
175                let mut o = Map::new();
176                o.insert("marker".into(), Value::Number(c.marker as f64));
177                o.insert("urn".into(), Value::String(c.urn.clone()));
178                Value::Object(o)
179            })
180            .collect(),
181    )
182}
183
184fn sources_value(rows: &[SourceRow]) -> Value {
185    Value::Array(
186        rows.iter()
187            .map(|r| {
188                let mut o = Map::new();
189                o.insert("payload".into(), Value::String(r.payload.clone()));
190                o.insert("urn".into(), Value::String(r.urn.clone()));
191                Value::Object(o)
192            })
193            .collect(),
194    )
195}
196
197fn warning_value(w: &ValidationWarning) -> Value {
198    let mut o = Map::new();
199    o.insert("detail".into(), Value::String(w.detail.clone()));
200    o.insert("kind".into(), Value::String(w.kind.clone()));
201    Value::Object(o)
202}
203
204fn error_value(e: &ValidationError) -> Value {
205    let mut o = Map::new();
206    o.insert("detail".into(), Value::String(e.detail.clone()));
207    o.insert("kind".into(), Value::String(e.kind.clone()));
208    Value::Object(o)
209}
210
211fn validation_value(v: &Validation) -> Value {
212    let mut o = Map::new();
213    o.insert(
214        "errors".into(),
215        Value::Array(v.errors.iter().map(error_value).collect()),
216    );
217    o.insert("ok".into(), Value::Bool(v.ok));
218    o.insert(
219        "warnings".into(),
220        Value::Array(v.warnings.iter().map(warning_value).collect()),
221    );
222    Value::Object(o)
223}
224
225#[cfg(test)]
226mod tests {
227    use super::*;
228
229    fn fixture() -> AskResult {
230        AskResult {
231            answer: "X is 42 [^1].".into(),
232            sources_flat: vec![SourceRow {
233                urn: "urn:reddb:row:1".into(),
234                payload: "{\"k\":\"v\"}".into(),
235            }],
236            citations: vec![Citation {
237                marker: 1,
238                urn: "urn:reddb:row:1".into(),
239            }],
240            validation: Validation {
241                ok: true,
242                warnings: vec![],
243                errors: vec![],
244            },
245            cache_hit: false,
246            provider: "openai".into(),
247            model: "gpt-4o-mini".into(),
248            prompt_tokens: 123,
249            completion_tokens: 45,
250            cost_usd: 0.000_321,
251            effective_mode: Mode::Strict,
252            retry_count: 0,
253        }
254    }
255
256    #[test]
257    fn build_emits_every_required_key() {
258        let v = build(&fixture());
259        let obj = v.as_object().unwrap();
260        let mut keys: Vec<&str> = obj.keys().map(|s| s.as_str()).collect();
261        keys.sort();
262        assert_eq!(
263            keys,
264            vec![
265                "answer",
266                "cache_hit",
267                "citations",
268                "completion_tokens",
269                "cost_usd",
270                "mode",
271                "model",
272                "prompt_tokens",
273                "provider",
274                "retry_count",
275                "sources_flat",
276                "validation",
277            ]
278        );
279    }
280
281    #[test]
282    fn answer_text_preserved_with_inline_markers() {
283        let v = build(&fixture());
284        assert_eq!(
285            v.get("answer").and_then(|x| x.as_str()),
286            Some("X is 42 [^1].")
287        );
288    }
289
290    #[test]
291    fn cache_hit_serializes_as_bool() {
292        let mut r = fixture();
293        r.cache_hit = true;
294        let v = build(&r);
295        assert_eq!(v.get("cache_hit").and_then(|x| x.as_bool()), Some(true));
296    }
297
298    #[test]
299    fn citations_are_sorted_by_marker_ascending() {
300        let mut r = fixture();
301        r.citations = vec![
302            Citation {
303                marker: 3,
304                urn: "urn:c".into(),
305            },
306            Citation {
307                marker: 1,
308                urn: "urn:a".into(),
309            },
310            Citation {
311                marker: 2,
312                urn: "urn:b".into(),
313            },
314        ];
315        let v = build(&r);
316        let arr = v.get("citations").and_then(|x| x.as_array()).unwrap();
317        let markers: Vec<u64> = arr
318            .iter()
319            .map(|c| c.get("marker").and_then(|m| m.as_u64()).unwrap())
320            .collect();
321        assert_eq!(markers, vec![1, 2, 3]);
322    }
323
324    #[test]
325    fn sources_flat_preserves_input_order() {
326        // Post-RRF order is the contract — `[^N]` indexes into this
327        // array, so reordering would silently break grounding.
328        let mut r = fixture();
329        r.sources_flat = vec![
330            SourceRow {
331                urn: "urn:z".into(),
332                payload: "{}".into(),
333            },
334            SourceRow {
335                urn: "urn:a".into(),
336                payload: "{}".into(),
337            },
338        ];
339        let v = build(&r);
340        let arr = v.get("sources_flat").and_then(|x| x.as_array()).unwrap();
341        assert_eq!(arr[0].get("urn").and_then(|x| x.as_str()), Some("urn:z"));
342        assert_eq!(arr[1].get("urn").and_then(|x| x.as_str()), Some("urn:a"));
343    }
344
345    #[test]
346    fn sources_row_carries_payload_as_string() {
347        let v = build(&fixture());
348        let arr = v.get("sources_flat").and_then(|x| x.as_array()).unwrap();
349        assert_eq!(
350            arr[0].get("payload").and_then(|x| x.as_str()),
351            Some("{\"k\":\"v\"}")
352        );
353    }
354
355    #[test]
356    fn validation_ok_carries_empty_arrays() {
357        let v = build(&fixture());
358        let val = v.get("validation").unwrap();
359        assert_eq!(val.get("ok").and_then(|x| x.as_bool()), Some(true));
360        assert_eq!(
361            val.get("warnings")
362                .and_then(|x| x.as_array())
363                .unwrap()
364                .len(),
365            0
366        );
367        assert_eq!(
368            val.get("errors").and_then(|x| x.as_array()).unwrap().len(),
369            0
370        );
371    }
372
373    #[test]
374    fn validation_carries_warnings_and_errors_with_kind_detail() {
375        let mut r = fixture();
376        r.validation = Validation {
377            ok: false,
378            warnings: vec![ValidationWarning {
379                kind: "mode_fallback".into(),
380                detail: "ollama".into(),
381            }],
382            errors: vec![ValidationError {
383                kind: "out_of_range".into(),
384                detail: "marker 7 > 3 sources".into(),
385            }],
386        };
387        let v = build(&r);
388        let val = v.get("validation").unwrap();
389        assert_eq!(val.get("ok").and_then(|x| x.as_bool()), Some(false));
390        let warns = val.get("warnings").and_then(|x| x.as_array()).unwrap();
391        assert_eq!(
392            warns[0].get("kind").and_then(|x| x.as_str()),
393            Some("mode_fallback")
394        );
395        assert_eq!(
396            warns[0].get("detail").and_then(|x| x.as_str()),
397            Some("ollama")
398        );
399        let errs = val.get("errors").and_then(|x| x.as_array()).unwrap();
400        assert_eq!(
401            errs[0].get("kind").and_then(|x| x.as_str()),
402            Some("out_of_range")
403        );
404    }
405
406    #[test]
407    fn mode_serializes_as_strict_or_lenient() {
408        let mut r = fixture();
409        r.effective_mode = Mode::Strict;
410        assert_eq!(
411            build(&r).get("mode").and_then(|x| x.as_str()),
412            Some("strict")
413        );
414        r.effective_mode = Mode::Lenient;
415        assert_eq!(
416            build(&r).get("mode").and_then(|x| x.as_str()),
417            Some("lenient")
418        );
419    }
420
421    #[test]
422    fn usage_fields_flat_at_top_level() {
423        // Matches the audit row shape (#402) and SSE audit frame
424        // (#405). Nested `usage: {...}` would force every transport
425        // and SDK to re-shape.
426        let v = build(&fixture());
427        assert_eq!(v.get("prompt_tokens").and_then(|x| x.as_u64()), Some(123));
428        assert_eq!(
429            v.get("completion_tokens").and_then(|x| x.as_u64()),
430            Some(45)
431        );
432        assert!(v.get("cost_usd").is_some());
433    }
434
435    #[test]
436    fn cost_usd_keeps_fractional_precision() {
437        let mut r = fixture();
438        r.cost_usd = 0.000_321;
439        let v = build(&r);
440        assert_eq!(v.get("cost_usd").and_then(|x| x.as_f64()), Some(0.000_321));
441    }
442
443    #[test]
444    fn retry_count_zero_and_one_both_round_trip() {
445        // #395 caps retries at one — pinning both endpoints guards
446        // against an off-by-one if the budget ever changes.
447        let mut r = fixture();
448        r.retry_count = 0;
449        assert_eq!(
450            build(&r).get("retry_count").and_then(|x| x.as_u64()),
451            Some(0)
452        );
453        r.retry_count = 1;
454        assert_eq!(
455            build(&r).get("retry_count").and_then(|x| x.as_u64()),
456            Some(1)
457        );
458    }
459
460    #[test]
461    fn does_not_expose_seed_or_temperature() {
462        // Determinism inputs (#400) are recorded in the audit row,
463        // not surfaced to the caller. Leaking the seed would let a
464        // hostile caller replay deterministic answers.
465        let v = build(&fixture());
466        let obj = v.as_object().unwrap();
467        assert!(!obj.contains_key("seed"));
468        assert!(!obj.contains_key("temperature"));
469    }
470
471    #[test]
472    fn empty_sources_and_citations_are_arrays_not_null() {
473        // Empty arrays are well-formed (`STRICT OFF` on a refusal can
474        // produce no citations). Missing keys would break a downstream
475        // `.length` access.
476        let mut r = fixture();
477        r.sources_flat = vec![];
478        r.citations = vec![];
479        let v = build(&r);
480        assert!(v
481            .get("sources_flat")
482            .and_then(|x| x.as_array())
483            .unwrap()
484            .is_empty());
485        assert!(v
486            .get("citations")
487            .and_then(|x| x.as_array())
488            .unwrap()
489            .is_empty());
490    }
491
492    #[test]
493    fn answer_escaping_handled_by_compact_encoder() {
494        let mut r = fixture();
495        r.answer = "she said \"hi\"\nnewline".into();
496        let bytes = build(&r).to_string_compact();
497        assert!(bytes.contains(r#"\"hi\""#));
498        assert!(bytes.contains(r#"\n"#));
499    }
500
501    #[test]
502    fn build_is_deterministic_across_calls() {
503        let r = fixture();
504        let a = build(&r).to_string_compact();
505        let b = build(&r).to_string_compact();
506        assert_eq!(a, b);
507    }
508
509    #[test]
510    fn build_is_deterministic_across_clone_inputs() {
511        let r1 = fixture();
512        let r2 = r1.clone();
513        assert_eq!(
514            build(&r1).to_string_compact(),
515            build(&r2).to_string_compact()
516        );
517    }
518
519    #[test]
520    fn top_level_key_order_is_alphabetical() {
521        // Pinned because clients on weak parsers (e.g. some PG-wire
522        // bindings doing string slicing) have been known to depend on
523        // it. BTreeMap-backed encoder gives it for free.
524        let bytes = build(&fixture()).to_string_compact();
525        let answer_pos = bytes.find("\"answer\"").unwrap();
526        let cache_pos = bytes.find("\"cache_hit\"").unwrap();
527        let citations_pos = bytes.find("\"citations\"").unwrap();
528        let validation_pos = bytes.find("\"validation\"").unwrap();
529        assert!(answer_pos < cache_pos);
530        assert!(cache_pos < citations_pos);
531        assert!(citations_pos < validation_pos);
532    }
533
534    #[test]
535    fn citation_with_same_marker_is_stable_under_sort() {
536        // Defensive: if two citations share a marker (malformed input
537        // from the validator path), the sort must be stable so the
538        // input order is preserved. Pinned because a different sort
539        // strategy (unstable + tie on marker) would non-determinise
540        // the response and break #400.
541        let mut r = fixture();
542        r.citations = vec![
543            Citation {
544                marker: 1,
545                urn: "urn:first".into(),
546            },
547            Citation {
548                marker: 1,
549                urn: "urn:second".into(),
550            },
551        ];
552        let v = build(&r);
553        let arr = v.get("citations").and_then(|x| x.as_array()).unwrap();
554        assert_eq!(
555            arr[0].get("urn").and_then(|x| x.as_str()),
556            Some("urn:first")
557        );
558        assert_eq!(
559            arr[1].get("urn").and_then(|x| x.as_str()),
560            Some("urn:second")
561        );
562    }
563}