1use std::collections::BTreeMap;
73
74use sha2::{Digest, Sha256};
75
76use crate::json;
77use crate::runtime::ai::strict_validator::{Mode, ValidationError, ValidationErrorKind};
78use crate::serde_json::Value;
79
80#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
86pub struct Settings {
87 pub include_answer: bool,
90}
91
92#[derive(Debug, Clone)]
96pub struct CallState<'a> {
97 pub ts_nanos: i64,
100 pub tenant: &'a str,
101 pub user: &'a str,
102 pub role: &'a str,
103 pub question: &'a str,
104 pub sources_urns: &'a [String],
105 pub provider: &'a str,
106 pub model: &'a str,
107 pub prompt_tokens: i64,
108 pub completion_tokens: i64,
109 pub cost_usd: f64,
110 pub answer: &'a str,
113 pub citations: &'a [u32],
115 pub cache_hit: bool,
116 pub effective_mode: Mode,
119 pub temperature: Option<f32>,
120 pub seed: Option<u64>,
121 pub validation_ok: bool,
122 pub retry_count: u32,
123 pub errors: &'a [ValidationError],
124}
125
126pub fn build(state: &CallState<'_>, settings: Settings) -> BTreeMap<&'static str, Value> {
134 let mut row: BTreeMap<&'static str, Value> = BTreeMap::new();
135
136 row.insert("ts", json!(state.ts_nanos));
137 row.insert("tenant", json!(state.tenant));
138 row.insert("user", json!(state.user));
139 row.insert("role", json!(state.role));
140 row.insert("question", json!(state.question));
141 row.insert("sources_urns", json!(state.sources_urns));
142 row.insert("provider", json!(state.provider));
143 row.insert("model", json!(state.model));
144 row.insert("prompt_tokens", json!(state.prompt_tokens));
145 row.insert("completion_tokens", json!(state.completion_tokens));
146 row.insert("cost_usd", json!(state.cost_usd));
147 row.insert("answer_hash", json!(answer_hash(state.answer)));
148 row.insert("citations", json!(state.citations));
149 row.insert("cache_hit", json!(state.cache_hit));
150 row.insert("mode", json!(mode_str(state.effective_mode)));
151 row.insert(
152 "temperature",
153 state
154 .temperature
155 .map(|value| json!(value))
156 .unwrap_or(Value::Null),
157 );
158 row.insert(
159 "seed",
160 state.seed.map(|value| json!(value)).unwrap_or(Value::Null),
161 );
162 row.insert("validation_ok", json!(state.validation_ok));
163 row.insert("retry_count", json!(state.retry_count));
164 row.insert(
165 "errors",
166 Value::Array(state.errors.iter().map(error_json).collect()),
167 );
168
169 if settings.include_answer {
170 row.insert("answer", json!(state.answer));
171 }
172
173 row
174}
175
176pub fn answer_hash(answer: &str) -> String {
180 let mut hasher = Sha256::new();
181 hasher.update(answer.as_bytes());
182 let bytes = hasher.finalize();
183 let mut out = String::with_capacity(bytes.len() * 2);
184 for b in bytes {
185 out.push_str(&format!("{b:02x}"));
186 }
187 out
188}
189
190fn mode_str(mode: Mode) -> &'static str {
191 match mode {
192 Mode::Strict => "strict",
193 Mode::Lenient => "lenient",
194 }
195}
196
197fn error_kind_str(kind: ValidationErrorKind) -> &'static str {
198 match kind {
199 ValidationErrorKind::Malformed => "malformed",
200 ValidationErrorKind::OutOfRange => "out_of_range",
201 }
202}
203
204fn error_json(err: &ValidationError) -> Value {
205 json!({
206 "kind": error_kind_str(err.kind),
207 "detail": err.detail,
208 })
209}
210
211#[cfg(test)]
212mod tests {
213 use super::*;
214
215 fn base_state<'a>(
216 question: &'a str,
217 urns: &'a [String],
218 answer: &'a str,
219 citations: &'a [u32],
220 errors: &'a [ValidationError],
221 ) -> CallState<'a> {
222 CallState {
223 ts_nanos: 1_700_000_000_000_000_000,
224 tenant: "acme",
225 user: "alice",
226 role: "analyst",
227 question,
228 sources_urns: urns,
229 provider: "openai",
230 model: "gpt-4o-mini",
231 prompt_tokens: 123,
232 completion_tokens: 45,
233 cost_usd: 0.0012,
234 answer,
235 citations,
236 cache_hit: false,
237 effective_mode: Mode::Strict,
238 temperature: Some(0.0),
239 seed: Some(42),
240 validation_ok: true,
241 retry_count: 0,
242 errors,
243 }
244 }
245
246 #[test]
249 fn answer_hash_is_deterministic_sha256() {
250 assert_eq!(
252 answer_hash(""),
253 "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
254 );
255 }
256
257 #[test]
258 fn answer_hash_known_value_for_short_string() {
259 assert_eq!(
261 answer_hash("hello"),
262 "2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824"
263 );
264 }
265
266 #[test]
267 fn answer_hash_repeated_calls_byte_equal() {
268 let a = answer_hash("the cat sat on the mat");
269 let b = answer_hash("the cat sat on the mat");
270 assert_eq!(a, b);
271 }
272
273 #[test]
274 fn answer_hash_differs_for_differing_input() {
275 assert_ne!(answer_hash("a"), answer_hash("b"));
276 }
277
278 #[test]
281 fn build_emits_every_required_field() {
282 let urns = vec!["urn:a".to_string(), "urn:b".to_string()];
283 let citations = vec![1u32, 2];
284 let errors: Vec<ValidationError> = vec![];
285 let state = base_state("q?", &urns, "answer text", &citations, &errors);
286
287 let row = build(&state, Settings::default());
288
289 for key in [
290 "ts",
291 "tenant",
292 "user",
293 "role",
294 "question",
295 "sources_urns",
296 "provider",
297 "model",
298 "prompt_tokens",
299 "completion_tokens",
300 "cost_usd",
301 "answer_hash",
302 "citations",
303 "cache_hit",
304 "mode",
305 "temperature",
306 "seed",
307 "validation_ok",
308 "retry_count",
309 "errors",
310 ] {
311 assert!(row.contains_key(key), "row missing required field `{key}`");
312 }
313 }
314
315 #[test]
316 fn build_field_values_match_state() {
317 let urns = vec!["urn:x".to_string()];
318 let citations = vec![3u32];
319 let errors: Vec<ValidationError> = vec![];
320 let state = base_state("why?", &urns, "because", &citations, &errors);
321
322 let row = build(&state, Settings::default());
323
324 assert_eq!(row["ts"], json!(1_700_000_000_000_000_000_i64));
325 assert_eq!(row["tenant"], json!("acme"));
326 assert_eq!(row["user"], json!("alice"));
327 assert_eq!(row["role"], json!("analyst"));
328 assert_eq!(row["question"], json!("why?"));
329 assert_eq!(row["sources_urns"], json!(["urn:x"]));
330 assert_eq!(row["provider"], json!("openai"));
331 assert_eq!(row["model"], json!("gpt-4o-mini"));
332 assert_eq!(row["prompt_tokens"], json!(123));
333 assert_eq!(row["completion_tokens"], json!(45));
334 assert_eq!(row["cost_usd"], json!(0.0012));
335 assert_eq!(row["answer_hash"], json!(answer_hash("because")));
336 assert_eq!(row["citations"], json!([3]));
337 assert_eq!(row["cache_hit"], json!(false));
338 assert_eq!(row["mode"], json!("strict"));
339 assert_eq!(row["temperature"], json!(0.0));
340 assert_eq!(row["seed"], json!(42u64));
341 assert_eq!(row["validation_ok"], json!(true));
342 assert_eq!(row["retry_count"], json!(0));
343 assert_eq!(row["errors"], json!([]));
344 }
345
346 #[test]
347 fn unsupported_determinism_knobs_are_recorded_as_null() {
348 let urns: Vec<String> = vec![];
349 let citations: Vec<u32> = vec![];
350 let errors: Vec<ValidationError> = vec![];
351 let mut state = base_state("q", &urns, "a", &citations, &errors);
352 state.temperature = None;
353 state.seed = None;
354
355 let row = build(&state, Settings::default());
356
357 assert_eq!(row["temperature"], Value::Null);
358 assert_eq!(row["seed"], Value::Null);
359 }
360
361 #[test]
364 fn answer_field_absent_by_default() {
365 let urns: Vec<String> = vec![];
366 let citations: Vec<u32> = vec![];
367 let errors: Vec<ValidationError> = vec![];
368 let state = base_state("q", &urns, "secret answer", &citations, &errors);
369
370 let row = build(&state, Settings::default());
371
372 assert!(!row.contains_key("answer"));
373 assert_eq!(row["answer_hash"], json!(answer_hash("secret answer")));
376 }
377
378 #[test]
379 fn answer_field_present_when_include_answer_set() {
380 let urns: Vec<String> = vec![];
381 let citations: Vec<u32> = vec![];
382 let errors: Vec<ValidationError> = vec![];
383 let state = base_state("q", &urns, "full text", &citations, &errors);
384
385 let row = build(
386 &state,
387 Settings {
388 include_answer: true,
389 },
390 );
391
392 assert_eq!(row["answer"], json!("full text"));
393 assert_eq!(row["answer_hash"], json!(answer_hash("full text")));
396 }
397
398 #[test]
401 fn lenient_mode_serializes_as_lenient_string() {
402 let urns: Vec<String> = vec![];
403 let citations: Vec<u32> = vec![];
404 let errors: Vec<ValidationError> = vec![];
405 let mut state = base_state("q", &urns, "a", &citations, &errors);
406 state.effective_mode = Mode::Lenient;
407
408 let row = build(&state, Settings::default());
409
410 assert_eq!(row["mode"], json!("lenient"));
411 }
412
413 #[test]
414 fn errors_round_trip_with_kind_and_detail() {
415 let urns: Vec<String> = vec![];
416 let citations: Vec<u32> = vec![];
417 let errors = vec![
418 ValidationError {
419 kind: ValidationErrorKind::Malformed,
420 detail: "empty marker body".to_string(),
421 },
422 ValidationError {
423 kind: ValidationErrorKind::OutOfRange,
424 detail: "marker [^9] references source #9".to_string(),
425 },
426 ];
427 let mut state = base_state("q", &urns, "a", &citations, &errors);
428 state.validation_ok = false;
429 state.retry_count = 1;
430
431 let row = build(&state, Settings::default());
432
433 assert_eq!(row["validation_ok"], json!(false));
434 assert_eq!(row["retry_count"], json!(1));
435 assert_eq!(
436 row["errors"],
437 json!([
438 json!({"kind": "malformed", "detail": "empty marker body"}),
439 json!({"kind": "out_of_range", "detail": "marker [^9] references source #9"}),
440 ])
441 );
442 }
443
444 #[test]
447 fn cache_hit_recorded() {
448 let urns: Vec<String> = vec![];
449 let citations: Vec<u32> = vec![];
450 let errors: Vec<ValidationError> = vec![];
451 let mut state = base_state("q", &urns, "cached", &citations, &errors);
452 state.cache_hit = true;
453 state.prompt_tokens = 0;
454 state.completion_tokens = 0;
455 state.cost_usd = 0.0;
456
457 let row = build(&state, Settings::default());
458
459 assert_eq!(row["cache_hit"], json!(true));
460 assert_eq!(row["cost_usd"], json!(0.0));
463 assert_eq!(row["prompt_tokens"], json!(0));
464 }
465
466 #[test]
469 fn empty_identity_fields_allowed() {
470 let urns: Vec<String> = vec![];
473 let citations: Vec<u32> = vec![];
474 let errors: Vec<ValidationError> = vec![];
475 let mut state = base_state("q", &urns, "a", &citations, &errors);
476 state.tenant = "";
477 state.user = "";
478 state.role = "";
479
480 let row = build(&state, Settings::default());
481
482 assert_eq!(row["tenant"], json!(""));
483 assert_eq!(row["user"], json!(""));
484 assert_eq!(row["role"], json!(""));
485 }
486
487 #[test]
488 fn empty_sources_serializes_as_empty_array() {
489 let urns: Vec<String> = vec![];
490 let citations: Vec<u32> = vec![];
491 let errors: Vec<ValidationError> = vec![];
492 let state = base_state("q", &urns, "a", &citations, &errors);
493
494 let row = build(&state, Settings::default());
495
496 assert_eq!(row["sources_urns"], json!([]));
497 assert_eq!(row["citations"], json!([]));
498 assert_eq!(row["errors"], json!([]));
499 }
500
501 #[test]
502 fn sources_order_preserved() {
503 let urns = vec![
507 "urn:c".to_string(),
508 "urn:a".to_string(),
509 "urn:b".to_string(),
510 ];
511 let citations: Vec<u32> = vec![];
512 let errors: Vec<ValidationError> = vec![];
513 let state = base_state("q", &urns, "a", &citations, &errors);
514
515 let row = build(&state, Settings::default());
516
517 assert_eq!(row["sources_urns"], json!(["urn:c", "urn:a", "urn:b"]));
518 }
519
520 #[test]
521 fn build_is_deterministic_across_calls() {
522 let urns = vec!["urn:a".to_string()];
526 let citations = vec![1u32];
527 let errors: Vec<ValidationError> = vec![];
528 let state = base_state("q", &urns, "a", &citations, &errors);
529
530 let a = build(&state, Settings::default());
531 let b = build(&state, Settings::default());
532 assert_eq!(a, b);
533 }
534}