use std::collections::BTreeMap;
use sha2::{Digest, Sha256};
use crate::json;
use crate::runtime::ai::strict_validator::{Mode, ValidationError, ValidationErrorKind};
use crate::serde_json::Value;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Settings {
pub include_answer: bool,
}
impl Default for Settings {
fn default() -> Self {
Self {
include_answer: false,
}
}
}
#[derive(Debug, Clone)]
pub struct CallState<'a> {
pub ts_nanos: i64,
pub tenant: &'a str,
pub user: &'a str,
pub role: &'a str,
pub question: &'a str,
pub sources_urns: &'a [String],
pub provider: &'a str,
pub model: &'a str,
pub prompt_tokens: i64,
pub completion_tokens: i64,
pub cost_usd: f64,
pub answer: &'a str,
pub citations: &'a [u32],
pub cache_hit: bool,
pub effective_mode: Mode,
pub temperature: Option<f32>,
pub seed: Option<u64>,
pub validation_ok: bool,
pub retry_count: u32,
pub errors: &'a [ValidationError],
}
pub fn build(state: &CallState<'_>, settings: Settings) -> BTreeMap<&'static str, Value> {
let mut row: BTreeMap<&'static str, Value> = BTreeMap::new();
row.insert("ts", json!(state.ts_nanos));
row.insert("tenant", json!(state.tenant));
row.insert("user", json!(state.user));
row.insert("role", json!(state.role));
row.insert("question", json!(state.question));
row.insert("sources_urns", json!(state.sources_urns));
row.insert("provider", json!(state.provider));
row.insert("model", json!(state.model));
row.insert("prompt_tokens", json!(state.prompt_tokens));
row.insert("completion_tokens", json!(state.completion_tokens));
row.insert("cost_usd", json!(state.cost_usd));
row.insert("answer_hash", json!(answer_hash(state.answer)));
row.insert("citations", json!(state.citations));
row.insert("cache_hit", json!(state.cache_hit));
row.insert("mode", json!(mode_str(state.effective_mode)));
row.insert(
"temperature",
state
.temperature
.map(|value| json!(value))
.unwrap_or(Value::Null),
);
row.insert(
"seed",
state.seed.map(|value| json!(value)).unwrap_or(Value::Null),
);
row.insert("validation_ok", json!(state.validation_ok));
row.insert("retry_count", json!(state.retry_count));
row.insert(
"errors",
Value::Array(state.errors.iter().map(error_json).collect()),
);
if settings.include_answer {
row.insert("answer", json!(state.answer));
}
row
}
pub fn answer_hash(answer: &str) -> String {
let mut hasher = Sha256::new();
hasher.update(answer.as_bytes());
let bytes = hasher.finalize();
let mut out = String::with_capacity(bytes.len() * 2);
for b in bytes {
out.push_str(&format!("{b:02x}"));
}
out
}
fn mode_str(mode: Mode) -> &'static str {
match mode {
Mode::Strict => "strict",
Mode::Lenient => "lenient",
}
}
fn error_kind_str(kind: ValidationErrorKind) -> &'static str {
match kind {
ValidationErrorKind::Malformed => "malformed",
ValidationErrorKind::OutOfRange => "out_of_range",
}
}
fn error_json(err: &ValidationError) -> Value {
json!({
"kind": error_kind_str(err.kind),
"detail": err.detail,
})
}
#[cfg(test)]
mod tests {
use super::*;
fn base_state<'a>(
question: &'a str,
urns: &'a [String],
answer: &'a str,
citations: &'a [u32],
errors: &'a [ValidationError],
) -> CallState<'a> {
CallState {
ts_nanos: 1_700_000_000_000_000_000,
tenant: "acme",
user: "alice",
role: "analyst",
question,
sources_urns: urns,
provider: "openai",
model: "gpt-4o-mini",
prompt_tokens: 123,
completion_tokens: 45,
cost_usd: 0.0012,
answer,
citations,
cache_hit: false,
effective_mode: Mode::Strict,
temperature: Some(0.0),
seed: Some(42),
validation_ok: true,
retry_count: 0,
errors,
}
}
#[test]
fn answer_hash_is_deterministic_sha256() {
assert_eq!(
answer_hash(""),
"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
);
}
#[test]
fn answer_hash_known_value_for_short_string() {
assert_eq!(
answer_hash("hello"),
"2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824"
);
}
#[test]
fn answer_hash_repeated_calls_byte_equal() {
let a = answer_hash("the cat sat on the mat");
let b = answer_hash("the cat sat on the mat");
assert_eq!(a, b);
}
#[test]
fn answer_hash_differs_for_differing_input() {
assert_ne!(answer_hash("a"), answer_hash("b"));
}
#[test]
fn build_emits_every_required_field() {
let urns = vec!["urn:a".to_string(), "urn:b".to_string()];
let citations = vec![1u32, 2];
let errors: Vec<ValidationError> = vec![];
let state = base_state("q?", &urns, "answer text", &citations, &errors);
let row = build(&state, Settings::default());
for key in [
"ts",
"tenant",
"user",
"role",
"question",
"sources_urns",
"provider",
"model",
"prompt_tokens",
"completion_tokens",
"cost_usd",
"answer_hash",
"citations",
"cache_hit",
"mode",
"temperature",
"seed",
"validation_ok",
"retry_count",
"errors",
] {
assert!(row.contains_key(key), "row missing required field `{key}`");
}
}
#[test]
fn build_field_values_match_state() {
let urns = vec!["urn:x".to_string()];
let citations = vec![3u32];
let errors: Vec<ValidationError> = vec![];
let state = base_state("why?", &urns, "because", &citations, &errors);
let row = build(&state, Settings::default());
assert_eq!(row["ts"], json!(1_700_000_000_000_000_000_i64));
assert_eq!(row["tenant"], json!("acme"));
assert_eq!(row["user"], json!("alice"));
assert_eq!(row["role"], json!("analyst"));
assert_eq!(row["question"], json!("why?"));
assert_eq!(row["sources_urns"], json!(["urn:x"]));
assert_eq!(row["provider"], json!("openai"));
assert_eq!(row["model"], json!("gpt-4o-mini"));
assert_eq!(row["prompt_tokens"], json!(123));
assert_eq!(row["completion_tokens"], json!(45));
assert_eq!(row["cost_usd"], json!(0.0012));
assert_eq!(row["answer_hash"], json!(answer_hash("because")));
assert_eq!(row["citations"], json!([3]));
assert_eq!(row["cache_hit"], json!(false));
assert_eq!(row["mode"], json!("strict"));
assert_eq!(row["temperature"], json!(0.0));
assert_eq!(row["seed"], json!(42u64));
assert_eq!(row["validation_ok"], json!(true));
assert_eq!(row["retry_count"], json!(0));
assert_eq!(row["errors"], json!([]));
}
#[test]
fn unsupported_determinism_knobs_are_recorded_as_null() {
let urns: Vec<String> = vec![];
let citations: Vec<u32> = vec![];
let errors: Vec<ValidationError> = vec![];
let mut state = base_state("q", &urns, "a", &citations, &errors);
state.temperature = None;
state.seed = None;
let row = build(&state, Settings::default());
assert_eq!(row["temperature"], Value::Null);
assert_eq!(row["seed"], Value::Null);
}
#[test]
fn answer_field_absent_by_default() {
let urns: Vec<String> = vec![];
let citations: Vec<u32> = vec![];
let errors: Vec<ValidationError> = vec![];
let state = base_state("q", &urns, "secret answer", &citations, &errors);
let row = build(&state, Settings::default());
assert!(!row.contains_key("answer"));
assert_eq!(row["answer_hash"], json!(answer_hash("secret answer")));
}
#[test]
fn answer_field_present_when_include_answer_set() {
let urns: Vec<String> = vec![];
let citations: Vec<u32> = vec![];
let errors: Vec<ValidationError> = vec![];
let state = base_state("q", &urns, "full text", &citations, &errors);
let row = build(
&state,
Settings {
include_answer: true,
},
);
assert_eq!(row["answer"], json!("full text"));
assert_eq!(row["answer_hash"], json!(answer_hash("full text")));
}
#[test]
fn lenient_mode_serializes_as_lenient_string() {
let urns: Vec<String> = vec![];
let citations: Vec<u32> = vec![];
let errors: Vec<ValidationError> = vec![];
let mut state = base_state("q", &urns, "a", &citations, &errors);
state.effective_mode = Mode::Lenient;
let row = build(&state, Settings::default());
assert_eq!(row["mode"], json!("lenient"));
}
#[test]
fn errors_round_trip_with_kind_and_detail() {
let urns: Vec<String> = vec![];
let citations: Vec<u32> = vec![];
let errors = vec![
ValidationError {
kind: ValidationErrorKind::Malformed,
detail: "empty marker body".to_string(),
},
ValidationError {
kind: ValidationErrorKind::OutOfRange,
detail: "marker [^9] references source #9".to_string(),
},
];
let mut state = base_state("q", &urns, "a", &citations, &errors);
state.validation_ok = false;
state.retry_count = 1;
let row = build(&state, Settings::default());
assert_eq!(row["validation_ok"], json!(false));
assert_eq!(row["retry_count"], json!(1));
assert_eq!(
row["errors"],
json!([
json!({"kind": "malformed", "detail": "empty marker body"}),
json!({"kind": "out_of_range", "detail": "marker [^9] references source #9"}),
])
);
}
#[test]
fn cache_hit_recorded() {
let urns: Vec<String> = vec![];
let citations: Vec<u32> = vec![];
let errors: Vec<ValidationError> = vec![];
let mut state = base_state("q", &urns, "cached", &citations, &errors);
state.cache_hit = true;
state.prompt_tokens = 0;
state.completion_tokens = 0;
state.cost_usd = 0.0;
let row = build(&state, Settings::default());
assert_eq!(row["cache_hit"], json!(true));
assert_eq!(row["cost_usd"], json!(0.0));
assert_eq!(row["prompt_tokens"], json!(0));
}
#[test]
fn empty_identity_fields_allowed() {
let urns: Vec<String> = vec![];
let citations: Vec<u32> = vec![];
let errors: Vec<ValidationError> = vec![];
let mut state = base_state("q", &urns, "a", &citations, &errors);
state.tenant = "";
state.user = "";
state.role = "";
let row = build(&state, Settings::default());
assert_eq!(row["tenant"], json!(""));
assert_eq!(row["user"], json!(""));
assert_eq!(row["role"], json!(""));
}
#[test]
fn empty_sources_serializes_as_empty_array() {
let urns: Vec<String> = vec![];
let citations: Vec<u32> = vec![];
let errors: Vec<ValidationError> = vec![];
let state = base_state("q", &urns, "a", &citations, &errors);
let row = build(&state, Settings::default());
assert_eq!(row["sources_urns"], json!([]));
assert_eq!(row["citations"], json!([]));
assert_eq!(row["errors"], json!([]));
}
#[test]
fn sources_order_preserved() {
let urns = vec![
"urn:c".to_string(),
"urn:a".to_string(),
"urn:b".to_string(),
];
let citations: Vec<u32> = vec![];
let errors: Vec<ValidationError> = vec![];
let state = base_state("q", &urns, "a", &citations, &errors);
let row = build(&state, Settings::default());
assert_eq!(row["sources_urns"], json!(["urn:c", "urn:a", "urn:b"]));
}
#[test]
fn build_is_deterministic_across_calls() {
let urns = vec!["urn:a".to_string()];
let citations = vec![1u32];
let errors: Vec<ValidationError> = vec![];
let state = base_state("q", &urns, "a", &citations, &errors);
let a = build(&state, Settings::default());
let b = build(&state, Settings::default());
assert_eq!(a, b);
}
}