use crate::runtime::ai::citation_parser::{
CitationParseResult, CitationWarning, CitationWarningKind,
};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Mode {
Strict,
Lenient,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Attempt {
First,
Retry,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ValidationError {
pub kind: ValidationErrorKind,
pub detail: String,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ValidationErrorKind {
Malformed,
OutOfRange,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Decision {
Ok,
Retry { prompt: String },
GiveUp { errors: Vec<ValidationError> },
}
pub fn validate(parsed: &CitationParseResult, mode: Mode, attempt: Attempt) -> Decision {
if mode == Mode::Lenient {
return Decision::Ok;
}
let structural_warnings: Vec<&CitationWarning> = parsed
.warnings
.iter()
.filter(|w| {
matches!(
w.kind,
CitationWarningKind::Malformed | CitationWarningKind::OutOfRange
)
})
.collect();
if structural_warnings.is_empty() {
return Decision::Ok;
}
match attempt {
Attempt::First => Decision::Retry {
prompt: build_retry_prompt(&structural_warnings),
},
Attempt::Retry => Decision::GiveUp {
errors: structural_warnings
.iter()
.map(|w| ValidationError {
kind: match w.kind {
CitationWarningKind::Malformed => ValidationErrorKind::Malformed,
CitationWarningKind::OutOfRange => ValidationErrorKind::OutOfRange,
},
detail: w.detail.clone(),
})
.collect(),
},
}
}
fn build_retry_prompt(warnings: &[&CitationWarning]) -> String {
let mut out = String::from(
"Your previous answer contained citation markers that do not match \
the available sources. Reissue the answer in full, with every \
`[^N]` marker referring to a real source by its 1-indexed position \
in the provided context. Do not invent or renumber sources; if a \
claim is not supported by a real source, drop the marker rather \
than fabricate one. Problems detected:\n",
);
for w in warnings {
let kind = match w.kind {
CitationWarningKind::Malformed => "malformed",
CitationWarningKind::OutOfRange => "out_of_range",
};
out.push_str(&format!("- [{kind}] {}\n", w.detail));
}
out
}
#[cfg(test)]
mod tests {
use super::*;
use crate::runtime::ai::citation_parser::{Citation, CitationParseResult, CitationWarning};
fn ok_result() -> CitationParseResult {
CitationParseResult {
citations: vec![Citation {
marker: 1,
span: 0..4,
source_index: 0,
}],
warnings: vec![],
}
}
fn malformed_result() -> CitationParseResult {
CitationParseResult {
citations: vec![],
warnings: vec![CitationWarning {
kind: CitationWarningKind::Malformed,
span: 0..4,
detail: "empty marker body".to_string(),
}],
}
}
fn out_of_range_result() -> CitationParseResult {
CitationParseResult {
citations: vec![Citation {
marker: 9,
span: 0..4,
source_index: 8,
}],
warnings: vec![CitationWarning {
kind: CitationWarningKind::OutOfRange,
span: 0..4,
detail: "marker [^9] references source #9 but only 2 sources available".to_string(),
}],
}
}
fn mixed_result() -> CitationParseResult {
CitationParseResult {
citations: vec![],
warnings: vec![
CitationWarning {
kind: CitationWarningKind::Malformed,
span: 0..3,
detail: "empty".into(),
},
CitationWarning {
kind: CitationWarningKind::OutOfRange,
span: 4..8,
detail: "marker [^7] references source #7 but only 1 sources available"
.to_string(),
},
],
}
}
#[test]
fn strict_clean_is_ok_on_first() {
assert_eq!(
validate(&ok_result(), Mode::Strict, Attempt::First),
Decision::Ok
);
}
#[test]
fn strict_clean_is_ok_on_retry_too() {
assert_eq!(
validate(&ok_result(), Mode::Strict, Attempt::Retry),
Decision::Ok
);
}
#[test]
fn strict_malformed_first_attempt_asks_for_retry() {
let decision = validate(&malformed_result(), Mode::Strict, Attempt::First);
match decision {
Decision::Retry { prompt } => {
assert!(prompt.contains("Reissue the answer"));
assert!(prompt.contains("malformed"));
assert!(prompt.contains("empty marker body"));
}
other => panic!("expected Retry, got {other:?}"),
}
}
#[test]
fn strict_out_of_range_first_attempt_asks_for_retry() {
let decision = validate(&out_of_range_result(), Mode::Strict, Attempt::First);
match decision {
Decision::Retry { prompt } => {
assert!(prompt.contains("out_of_range"));
assert!(prompt.contains("source #9"));
assert!(prompt.contains("Do not invent"));
}
other => panic!("expected Retry, got {other:?}"),
}
}
#[test]
fn strict_malformed_retry_attempt_gives_up() {
let decision = validate(&malformed_result(), Mode::Strict, Attempt::Retry);
match decision {
Decision::GiveUp { errors } => {
assert_eq!(errors.len(), 1);
assert_eq!(errors[0].kind, ValidationErrorKind::Malformed);
assert_eq!(errors[0].detail, "empty marker body");
}
other => panic!("expected GiveUp, got {other:?}"),
}
}
#[test]
fn strict_out_of_range_retry_attempt_gives_up() {
let decision = validate(&out_of_range_result(), Mode::Strict, Attempt::Retry);
match decision {
Decision::GiveUp { errors } => {
assert_eq!(errors.len(), 1);
assert_eq!(errors[0].kind, ValidationErrorKind::OutOfRange);
assert!(errors[0].detail.contains("source #9"));
}
other => panic!("expected GiveUp, got {other:?}"),
}
}
#[test]
fn strict_mixed_warnings_carry_through_to_giveup() {
let decision = validate(&mixed_result(), Mode::Strict, Attempt::Retry);
match decision {
Decision::GiveUp { errors } => {
assert_eq!(errors.len(), 2);
assert_eq!(errors[0].kind, ValidationErrorKind::Malformed);
assert_eq!(errors[1].kind, ValidationErrorKind::OutOfRange);
}
other => panic!("expected GiveUp, got {other:?}"),
}
}
#[test]
fn strict_mixed_warnings_first_attempt_still_retries() {
let decision = validate(&mixed_result(), Mode::Strict, Attempt::First);
assert!(matches!(decision, Decision::Retry { .. }));
}
#[test]
fn lenient_passes_clean() {
assert_eq!(
validate(&ok_result(), Mode::Lenient, Attempt::First),
Decision::Ok
);
}
#[test]
fn lenient_passes_malformed() {
assert_eq!(
validate(&malformed_result(), Mode::Lenient, Attempt::First),
Decision::Ok
);
}
#[test]
fn lenient_passes_out_of_range() {
assert_eq!(
validate(&out_of_range_result(), Mode::Lenient, Attempt::First),
Decision::Ok
);
}
#[test]
fn lenient_ignores_attempt() {
assert_eq!(
validate(&malformed_result(), Mode::Lenient, Attempt::Retry),
Decision::Ok
);
}
#[test]
fn retry_prompt_includes_every_warning_detail() {
let parsed = mixed_result();
let decision = validate(&parsed, Mode::Strict, Attempt::First);
let Decision::Retry { prompt } = decision else {
panic!("expected Retry");
};
for w in &parsed.warnings {
assert!(
prompt.contains(&w.detail),
"retry prompt missing detail `{}`, got:\n{prompt}",
w.detail
);
}
}
#[test]
fn retry_prompt_is_deterministic() {
let parsed = mixed_result();
let a = validate(&parsed, Mode::Strict, Attempt::First);
let b = validate(&parsed, Mode::Strict, Attempt::First);
assert_eq!(a, b);
}
#[test]
fn retry_prompt_forbids_fabrication() {
let decision = validate(&out_of_range_result(), Mode::Strict, Attempt::First);
let Decision::Retry { prompt } = decision else {
panic!("expected Retry");
};
assert!(prompt.contains("Do not invent"));
}
#[test]
fn empty_parse_is_ok_in_either_mode() {
let empty = CitationParseResult::default();
assert_eq!(validate(&empty, Mode::Strict, Attempt::First), Decision::Ok);
assert_eq!(validate(&empty, Mode::Strict, Attempt::Retry), Decision::Ok);
assert_eq!(
validate(&empty, Mode::Lenient, Attempt::First),
Decision::Ok
);
}
#[test]
fn citations_without_warnings_are_ok() {
let parsed = CitationParseResult {
citations: vec![
Citation {
marker: 1,
span: 0..4,
source_index: 0,
},
Citation {
marker: 2,
span: 5..9,
source_index: 1,
},
Citation {
marker: 3,
span: 10..14,
source_index: 2,
},
],
warnings: vec![],
};
assert_eq!(
validate(&parsed, Mode::Strict, Attempt::First),
Decision::Ok
);
}
}