Skip to main content

reddb_server/runtime/ai/
strict_validator.rs

1//! `StrictValidator` — pure citation validation policy.
2//!
3//! Issue #395 (PRD #391): after the `CitationParser` (issue #393)
4//! scans the LLM answer, this module decides what to do with the
5//! result given the requested mode and the current retry attempt.
6//!
7//! Deep module: no I/O, no transport, no LLM calls. Just an enum and
8//! one function. The caller is responsible for actually issuing the
9//! retry, mapping `GiveUp` to HTTP 422, etc.
10//!
11//! ## Policy
12//!
13//! Strict mode (the default per ADR 0013):
14//!
15//! - First call → if no malformed and no out-of-range, [`Decision::Ok`].
16//! - First call → otherwise, [`Decision::Retry`] with a corrected
17//!   prompt that tells the LLM the valid index range and asks it to
18//!   reissue the answer with citations in `1..=sources_count`.
19//! - Retry call → if still failing, [`Decision::GiveUp`] carrying the
20//!   structured errors that the HTTP layer should pack into the 422
21//!   response body under `validation.errors`.
22//!
23//! Exactly one retry is permitted. The validator tracks the retry
24//! budget via the [`Attempt`] argument — callers MUST pass
25//! [`Attempt::First`] on the initial call and [`Attempt::Retry`] on
26//! the single follow-up. There is no `Attempt::Retry2`; the type is
27//! the budget.
28//!
29//! Lenient mode ([`Mode::Lenient`], opt-in via `ASK '...' STRICT OFF`):
30//!
31//! - Always returns [`Decision::Ok`]. Warnings remain on the result
32//!   for the caller to surface, but the validator never asks for a
33//!   retry and never produces errors.
34//!
35//! ## Why a retry-prompt builder lives in here
36//!
37//! The retry message is part of the validator's contract — what the
38//! LLM is told on retry affects whether the second call is likely to
39//! succeed. Keeping prompt construction next to the decision logic
40//! lets the unit tests pin the exact phrasing, and keeps the
41//! `execute_ask` glue code tiny.
42
43use crate::runtime::ai::citation_parser::{
44    CitationParseResult, CitationWarning, CitationWarningKind,
45};
46
47/// Whether the caller wants strict validation or lenient warn-only.
48#[derive(Debug, Clone, Copy, PartialEq, Eq)]
49pub enum Mode {
50    /// Default. Structural failures trigger a retry; retry failure
51    /// becomes a hard 422.
52    Strict,
53    /// `ASK '...' STRICT OFF`. Warnings are surfaced but never block.
54    Lenient,
55}
56
57/// Which call this is — the validator uses this to enforce the
58/// one-retry budget.
59#[derive(Debug, Clone, Copy, PartialEq, Eq)]
60pub enum Attempt {
61    First,
62    Retry,
63}
64
65/// Structured error returned in `validation.errors` on retry exhaust.
66///
67/// Mirrors the `CitationWarning` shape but reframed as an error
68/// (the warning was advisory on the first call; on retry exhaust it
69/// becomes the reason we couldn't deliver an answer).
70#[derive(Debug, Clone, PartialEq, Eq)]
71pub struct ValidationError {
72    pub kind: ValidationErrorKind,
73    pub detail: String,
74}
75
76#[derive(Debug, Clone, Copy, PartialEq, Eq)]
77pub enum ValidationErrorKind {
78    /// `[^N]` body wasn't a positive decimal terminated by `]`.
79    Malformed,
80    /// `N` was outside `1..=sources_count`.
81    OutOfRange,
82}
83
84/// What the validator decided. The caller acts on this.
85#[derive(Debug, Clone, PartialEq, Eq)]
86pub enum Decision {
87    /// Citations parsed cleanly — emit the answer to the user.
88    Ok,
89    /// Strict + first attempt + structural failure. Caller should
90    /// issue exactly one follow-up LLM call with this prompt
91    /// prepended to (or substituted for) the synthesis prompt.
92    Retry { prompt: String },
93    /// Strict + retry attempt + still failing. Caller should respond
94    /// HTTP 422 with these errors in `validation.errors`.
95    GiveUp { errors: Vec<ValidationError> },
96}
97
98/// Pure validation step.
99///
100/// `sources_count` is the length of `sources_flat`; we don't re-derive
101/// out-of-range here because [`CitationParser`] already emitted the
102/// warning during parsing. We just decide what to *do* about it.
103pub fn validate(parsed: &CitationParseResult, mode: Mode, attempt: Attempt) -> Decision {
104    if mode == Mode::Lenient {
105        return Decision::Ok;
106    }
107
108    let structural_warnings: Vec<&CitationWarning> = parsed
109        .warnings
110        .iter()
111        .filter(|w| {
112            matches!(
113                w.kind,
114                CitationWarningKind::Malformed | CitationWarningKind::OutOfRange
115            )
116        })
117        .collect();
118
119    if structural_warnings.is_empty() {
120        return Decision::Ok;
121    }
122
123    match attempt {
124        Attempt::First => Decision::Retry {
125            prompt: build_retry_prompt(&structural_warnings),
126        },
127        Attempt::Retry => Decision::GiveUp {
128            errors: structural_warnings
129                .iter()
130                .map(|w| ValidationError {
131                    kind: match w.kind {
132                        CitationWarningKind::Malformed => ValidationErrorKind::Malformed,
133                        CitationWarningKind::OutOfRange => ValidationErrorKind::OutOfRange,
134                    },
135                    detail: w.detail.clone(),
136                })
137                .collect(),
138        },
139    }
140}
141
142/// Construct the prompt the caller should send on the single retry.
143///
144/// The phrasing is pinned by tests; it intentionally:
145///
146/// - states the valid range explicitly,
147/// - quotes the offending markers/details so the LLM sees its own
148///   mistake,
149/// - forbids inventing sources,
150/// - asks for the answer to be re-emitted in full (we don't try to
151///   patch the prior answer in place).
152fn build_retry_prompt(warnings: &[&CitationWarning]) -> String {
153    let mut out = String::from(
154        "Your previous answer contained citation markers that do not match \
155         the available sources. Reissue the answer in full, with every \
156         `[^N]` marker referring to a real source by its 1-indexed position \
157         in the provided context. Do not invent or renumber sources; if a \
158         claim is not supported by a real source, drop the marker rather \
159         than fabricate one. Problems detected:\n",
160    );
161    for w in warnings {
162        let kind = match w.kind {
163            CitationWarningKind::Malformed => "malformed",
164            CitationWarningKind::OutOfRange => "out_of_range",
165        };
166        out.push_str(&format!("- [{kind}] {}\n", w.detail));
167    }
168    out
169}
170
171#[cfg(test)]
172mod tests {
173    use super::*;
174    use crate::runtime::ai::citation_parser::{Citation, CitationParseResult, CitationWarning};
175
176    fn ok_result() -> CitationParseResult {
177        CitationParseResult {
178            citations: vec![Citation {
179                marker: 1,
180                span: 0..4,
181                source_index: 0,
182            }],
183            warnings: vec![],
184        }
185    }
186
187    fn malformed_result() -> CitationParseResult {
188        CitationParseResult {
189            citations: vec![],
190            warnings: vec![CitationWarning {
191                kind: CitationWarningKind::Malformed,
192                span: 0..4,
193                detail: "empty marker body".to_string(),
194            }],
195        }
196    }
197
198    fn out_of_range_result() -> CitationParseResult {
199        CitationParseResult {
200            citations: vec![Citation {
201                marker: 9,
202                span: 0..4,
203                source_index: 8,
204            }],
205            warnings: vec![CitationWarning {
206                kind: CitationWarningKind::OutOfRange,
207                span: 0..4,
208                detail: "marker [^9] references source #9 but only 2 sources available".to_string(),
209            }],
210        }
211    }
212
213    fn mixed_result() -> CitationParseResult {
214        CitationParseResult {
215            citations: vec![],
216            warnings: vec![
217                CitationWarning {
218                    kind: CitationWarningKind::Malformed,
219                    span: 0..3,
220                    detail: "empty".into(),
221                },
222                CitationWarning {
223                    kind: CitationWarningKind::OutOfRange,
224                    span: 4..8,
225                    detail: "marker [^7] references source #7 but only 1 sources available"
226                        .to_string(),
227                },
228            ],
229        }
230    }
231
232    // ---- Strict mode --------------------------------------------------
233
234    #[test]
235    fn strict_clean_is_ok_on_first() {
236        assert_eq!(
237            validate(&ok_result(), Mode::Strict, Attempt::First),
238            Decision::Ok
239        );
240    }
241
242    #[test]
243    fn strict_clean_is_ok_on_retry_too() {
244        // The retry call also produced clean output — that's the
245        // success path for "first call failed, retry succeeded".
246        assert_eq!(
247            validate(&ok_result(), Mode::Strict, Attempt::Retry),
248            Decision::Ok
249        );
250    }
251
252    #[test]
253    fn strict_malformed_first_attempt_asks_for_retry() {
254        let decision = validate(&malformed_result(), Mode::Strict, Attempt::First);
255        match decision {
256            Decision::Retry { prompt } => {
257                assert!(prompt.contains("Reissue the answer"));
258                assert!(prompt.contains("malformed"));
259                assert!(prompt.contains("empty marker body"));
260            }
261            other => panic!("expected Retry, got {other:?}"),
262        }
263    }
264
265    #[test]
266    fn strict_out_of_range_first_attempt_asks_for_retry() {
267        let decision = validate(&out_of_range_result(), Mode::Strict, Attempt::First);
268        match decision {
269            Decision::Retry { prompt } => {
270                assert!(prompt.contains("out_of_range"));
271                assert!(prompt.contains("source #9"));
272                // No-fabrication clause is part of the contract.
273                assert!(prompt.contains("Do not invent"));
274            }
275            other => panic!("expected Retry, got {other:?}"),
276        }
277    }
278
279    #[test]
280    fn strict_malformed_retry_attempt_gives_up() {
281        let decision = validate(&malformed_result(), Mode::Strict, Attempt::Retry);
282        match decision {
283            Decision::GiveUp { errors } => {
284                assert_eq!(errors.len(), 1);
285                assert_eq!(errors[0].kind, ValidationErrorKind::Malformed);
286                assert_eq!(errors[0].detail, "empty marker body");
287            }
288            other => panic!("expected GiveUp, got {other:?}"),
289        }
290    }
291
292    #[test]
293    fn strict_out_of_range_retry_attempt_gives_up() {
294        let decision = validate(&out_of_range_result(), Mode::Strict, Attempt::Retry);
295        match decision {
296            Decision::GiveUp { errors } => {
297                assert_eq!(errors.len(), 1);
298                assert_eq!(errors[0].kind, ValidationErrorKind::OutOfRange);
299                assert!(errors[0].detail.contains("source #9"));
300            }
301            other => panic!("expected GiveUp, got {other:?}"),
302        }
303    }
304
305    #[test]
306    fn strict_mixed_warnings_carry_through_to_giveup() {
307        let decision = validate(&mixed_result(), Mode::Strict, Attempt::Retry);
308        match decision {
309            Decision::GiveUp { errors } => {
310                assert_eq!(errors.len(), 2);
311                assert_eq!(errors[0].kind, ValidationErrorKind::Malformed);
312                assert_eq!(errors[1].kind, ValidationErrorKind::OutOfRange);
313            }
314            other => panic!("expected GiveUp, got {other:?}"),
315        }
316    }
317
318    #[test]
319    fn strict_mixed_warnings_first_attempt_still_retries() {
320        let decision = validate(&mixed_result(), Mode::Strict, Attempt::First);
321        assert!(matches!(decision, Decision::Retry { .. }));
322    }
323
324    // ---- Lenient mode -------------------------------------------------
325
326    #[test]
327    fn lenient_passes_clean() {
328        assert_eq!(
329            validate(&ok_result(), Mode::Lenient, Attempt::First),
330            Decision::Ok
331        );
332    }
333
334    #[test]
335    fn lenient_passes_malformed() {
336        // Warnings are still on `parsed.warnings`; the validator just
337        // refuses to act on them in lenient mode.
338        assert_eq!(
339            validate(&malformed_result(), Mode::Lenient, Attempt::First),
340            Decision::Ok
341        );
342    }
343
344    #[test]
345    fn lenient_passes_out_of_range() {
346        assert_eq!(
347            validate(&out_of_range_result(), Mode::Lenient, Attempt::First),
348            Decision::Ok
349        );
350    }
351
352    #[test]
353    fn lenient_ignores_attempt() {
354        // Retry-budget tracking is a strict-mode concern. In lenient
355        // mode the validator behaves identically regardless of attempt.
356        assert_eq!(
357            validate(&malformed_result(), Mode::Lenient, Attempt::Retry),
358            Decision::Ok
359        );
360    }
361
362    // ---- Retry-prompt contract ---------------------------------------
363
364    #[test]
365    fn retry_prompt_includes_every_warning_detail() {
366        let parsed = mixed_result();
367        let decision = validate(&parsed, Mode::Strict, Attempt::First);
368        let Decision::Retry { prompt } = decision else {
369            panic!("expected Retry");
370        };
371        for w in &parsed.warnings {
372            assert!(
373                prompt.contains(&w.detail),
374                "retry prompt missing detail `{}`, got:\n{prompt}",
375                w.detail
376            );
377        }
378    }
379
380    #[test]
381    fn retry_prompt_is_deterministic() {
382        // Two validations of the same input must produce byte-equal
383        // retry prompts — required for the ASK determinism contract
384        // (#400). Strings of side-effects (e.g. timestamps, RNG) must
385        // never leak into the prompt builder.
386        let parsed = mixed_result();
387        let a = validate(&parsed, Mode::Strict, Attempt::First);
388        let b = validate(&parsed, Mode::Strict, Attempt::First);
389        assert_eq!(a, b);
390    }
391
392    #[test]
393    fn retry_prompt_forbids_fabrication() {
394        let decision = validate(&out_of_range_result(), Mode::Strict, Attempt::First);
395        let Decision::Retry { prompt } = decision else {
396            panic!("expected Retry");
397        };
398        // Anti-hallucination guard — the LLM must not "fix" the
399        // citation by inventing a new source.
400        assert!(prompt.contains("Do not invent"));
401    }
402
403    // ---- Boundary cases ----------------------------------------------
404
405    #[test]
406    fn empty_parse_is_ok_in_either_mode() {
407        let empty = CitationParseResult::default();
408        assert_eq!(validate(&empty, Mode::Strict, Attempt::First), Decision::Ok);
409        assert_eq!(validate(&empty, Mode::Strict, Attempt::Retry), Decision::Ok);
410        assert_eq!(
411            validate(&empty, Mode::Lenient, Attempt::First),
412            Decision::Ok
413        );
414    }
415
416    #[test]
417    fn citations_without_warnings_are_ok() {
418        // Many successful citations, no warnings — the success path.
419        let parsed = CitationParseResult {
420            citations: vec![
421                Citation {
422                    marker: 1,
423                    span: 0..4,
424                    source_index: 0,
425                },
426                Citation {
427                    marker: 2,
428                    span: 5..9,
429                    source_index: 1,
430                },
431                Citation {
432                    marker: 3,
433                    span: 10..14,
434                    source_index: 2,
435                },
436            ],
437            warnings: vec![],
438        };
439        assert_eq!(
440            validate(&parsed, Mode::Strict, Attempt::First),
441            Decision::Ok
442        );
443    }
444}