1use std::collections::BTreeMap;
73
74use sha2::{Digest, Sha256};
75
76use crate::json;
77use crate::runtime::ai::strict_validator::{Mode, ValidationError, ValidationErrorKind};
78use crate::serde_json::Value;
79
80#[derive(Debug, Clone, Copy, PartialEq, Eq)]
86pub struct Settings {
87 pub include_answer: bool,
90}
91
92impl Default for Settings {
93 fn default() -> Self {
94 Self {
95 include_answer: false,
96 }
97 }
98}
99
100#[derive(Debug, Clone)]
104pub struct CallState<'a> {
105 pub ts_nanos: i64,
108 pub tenant: &'a str,
109 pub user: &'a str,
110 pub role: &'a str,
111 pub question: &'a str,
112 pub sources_urns: &'a [String],
113 pub provider: &'a str,
114 pub model: &'a str,
115 pub prompt_tokens: i64,
116 pub completion_tokens: i64,
117 pub cost_usd: f64,
118 pub answer: &'a str,
121 pub citations: &'a [u32],
123 pub cache_hit: bool,
124 pub effective_mode: Mode,
127 pub temperature: Option<f32>,
128 pub seed: Option<u64>,
129 pub validation_ok: bool,
130 pub retry_count: u32,
131 pub errors: &'a [ValidationError],
132}
133
134pub fn build(state: &CallState<'_>, settings: Settings) -> BTreeMap<&'static str, Value> {
142 let mut row: BTreeMap<&'static str, Value> = BTreeMap::new();
143
144 row.insert("ts", json!(state.ts_nanos));
145 row.insert("tenant", json!(state.tenant));
146 row.insert("user", json!(state.user));
147 row.insert("role", json!(state.role));
148 row.insert("question", json!(state.question));
149 row.insert("sources_urns", json!(state.sources_urns));
150 row.insert("provider", json!(state.provider));
151 row.insert("model", json!(state.model));
152 row.insert("prompt_tokens", json!(state.prompt_tokens));
153 row.insert("completion_tokens", json!(state.completion_tokens));
154 row.insert("cost_usd", json!(state.cost_usd));
155 row.insert("answer_hash", json!(answer_hash(state.answer)));
156 row.insert("citations", json!(state.citations));
157 row.insert("cache_hit", json!(state.cache_hit));
158 row.insert("mode", json!(mode_str(state.effective_mode)));
159 row.insert(
160 "temperature",
161 state
162 .temperature
163 .map(|value| json!(value))
164 .unwrap_or(Value::Null),
165 );
166 row.insert(
167 "seed",
168 state.seed.map(|value| json!(value)).unwrap_or(Value::Null),
169 );
170 row.insert("validation_ok", json!(state.validation_ok));
171 row.insert("retry_count", json!(state.retry_count));
172 row.insert(
173 "errors",
174 Value::Array(state.errors.iter().map(error_json).collect()),
175 );
176
177 if settings.include_answer {
178 row.insert("answer", json!(state.answer));
179 }
180
181 row
182}
183
184pub fn answer_hash(answer: &str) -> String {
188 let mut hasher = Sha256::new();
189 hasher.update(answer.as_bytes());
190 let bytes = hasher.finalize();
191 let mut out = String::with_capacity(bytes.len() * 2);
192 for b in bytes {
193 out.push_str(&format!("{b:02x}"));
194 }
195 out
196}
197
198fn mode_str(mode: Mode) -> &'static str {
199 match mode {
200 Mode::Strict => "strict",
201 Mode::Lenient => "lenient",
202 }
203}
204
205fn error_kind_str(kind: ValidationErrorKind) -> &'static str {
206 match kind {
207 ValidationErrorKind::Malformed => "malformed",
208 ValidationErrorKind::OutOfRange => "out_of_range",
209 }
210}
211
212fn error_json(err: &ValidationError) -> Value {
213 json!({
214 "kind": error_kind_str(err.kind),
215 "detail": err.detail,
216 })
217}
218
219#[cfg(test)]
220mod tests {
221 use super::*;
222
223 fn base_state<'a>(
224 question: &'a str,
225 urns: &'a [String],
226 answer: &'a str,
227 citations: &'a [u32],
228 errors: &'a [ValidationError],
229 ) -> CallState<'a> {
230 CallState {
231 ts_nanos: 1_700_000_000_000_000_000,
232 tenant: "acme",
233 user: "alice",
234 role: "analyst",
235 question,
236 sources_urns: urns,
237 provider: "openai",
238 model: "gpt-4o-mini",
239 prompt_tokens: 123,
240 completion_tokens: 45,
241 cost_usd: 0.0012,
242 answer,
243 citations,
244 cache_hit: false,
245 effective_mode: Mode::Strict,
246 temperature: Some(0.0),
247 seed: Some(42),
248 validation_ok: true,
249 retry_count: 0,
250 errors,
251 }
252 }
253
254 #[test]
257 fn answer_hash_is_deterministic_sha256() {
258 assert_eq!(
260 answer_hash(""),
261 "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
262 );
263 }
264
265 #[test]
266 fn answer_hash_known_value_for_short_string() {
267 assert_eq!(
269 answer_hash("hello"),
270 "2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824"
271 );
272 }
273
274 #[test]
275 fn answer_hash_repeated_calls_byte_equal() {
276 let a = answer_hash("the cat sat on the mat");
277 let b = answer_hash("the cat sat on the mat");
278 assert_eq!(a, b);
279 }
280
281 #[test]
282 fn answer_hash_differs_for_differing_input() {
283 assert_ne!(answer_hash("a"), answer_hash("b"));
284 }
285
286 #[test]
289 fn build_emits_every_required_field() {
290 let urns = vec!["urn:a".to_string(), "urn:b".to_string()];
291 let citations = vec![1u32, 2];
292 let errors: Vec<ValidationError> = vec![];
293 let state = base_state("q?", &urns, "answer text", &citations, &errors);
294
295 let row = build(&state, Settings::default());
296
297 for key in [
298 "ts",
299 "tenant",
300 "user",
301 "role",
302 "question",
303 "sources_urns",
304 "provider",
305 "model",
306 "prompt_tokens",
307 "completion_tokens",
308 "cost_usd",
309 "answer_hash",
310 "citations",
311 "cache_hit",
312 "mode",
313 "temperature",
314 "seed",
315 "validation_ok",
316 "retry_count",
317 "errors",
318 ] {
319 assert!(row.contains_key(key), "row missing required field `{key}`");
320 }
321 }
322
323 #[test]
324 fn build_field_values_match_state() {
325 let urns = vec!["urn:x".to_string()];
326 let citations = vec![3u32];
327 let errors: Vec<ValidationError> = vec![];
328 let state = base_state("why?", &urns, "because", &citations, &errors);
329
330 let row = build(&state, Settings::default());
331
332 assert_eq!(row["ts"], json!(1_700_000_000_000_000_000_i64));
333 assert_eq!(row["tenant"], json!("acme"));
334 assert_eq!(row["user"], json!("alice"));
335 assert_eq!(row["role"], json!("analyst"));
336 assert_eq!(row["question"], json!("why?"));
337 assert_eq!(row["sources_urns"], json!(["urn:x"]));
338 assert_eq!(row["provider"], json!("openai"));
339 assert_eq!(row["model"], json!("gpt-4o-mini"));
340 assert_eq!(row["prompt_tokens"], json!(123));
341 assert_eq!(row["completion_tokens"], json!(45));
342 assert_eq!(row["cost_usd"], json!(0.0012));
343 assert_eq!(row["answer_hash"], json!(answer_hash("because")));
344 assert_eq!(row["citations"], json!([3]));
345 assert_eq!(row["cache_hit"], json!(false));
346 assert_eq!(row["mode"], json!("strict"));
347 assert_eq!(row["temperature"], json!(0.0));
348 assert_eq!(row["seed"], json!(42u64));
349 assert_eq!(row["validation_ok"], json!(true));
350 assert_eq!(row["retry_count"], json!(0));
351 assert_eq!(row["errors"], json!([]));
352 }
353
354 #[test]
355 fn unsupported_determinism_knobs_are_recorded_as_null() {
356 let urns: Vec<String> = vec![];
357 let citations: Vec<u32> = vec![];
358 let errors: Vec<ValidationError> = vec![];
359 let mut state = base_state("q", &urns, "a", &citations, &errors);
360 state.temperature = None;
361 state.seed = None;
362
363 let row = build(&state, Settings::default());
364
365 assert_eq!(row["temperature"], Value::Null);
366 assert_eq!(row["seed"], Value::Null);
367 }
368
369 #[test]
372 fn answer_field_absent_by_default() {
373 let urns: Vec<String> = vec![];
374 let citations: Vec<u32> = vec![];
375 let errors: Vec<ValidationError> = vec![];
376 let state = base_state("q", &urns, "secret answer", &citations, &errors);
377
378 let row = build(&state, Settings::default());
379
380 assert!(!row.contains_key("answer"));
381 assert_eq!(row["answer_hash"], json!(answer_hash("secret answer")));
384 }
385
386 #[test]
387 fn answer_field_present_when_include_answer_set() {
388 let urns: Vec<String> = vec![];
389 let citations: Vec<u32> = vec![];
390 let errors: Vec<ValidationError> = vec![];
391 let state = base_state("q", &urns, "full text", &citations, &errors);
392
393 let row = build(
394 &state,
395 Settings {
396 include_answer: true,
397 },
398 );
399
400 assert_eq!(row["answer"], json!("full text"));
401 assert_eq!(row["answer_hash"], json!(answer_hash("full text")));
404 }
405
406 #[test]
409 fn lenient_mode_serializes_as_lenient_string() {
410 let urns: Vec<String> = vec![];
411 let citations: Vec<u32> = vec![];
412 let errors: Vec<ValidationError> = vec![];
413 let mut state = base_state("q", &urns, "a", &citations, &errors);
414 state.effective_mode = Mode::Lenient;
415
416 let row = build(&state, Settings::default());
417
418 assert_eq!(row["mode"], json!("lenient"));
419 }
420
421 #[test]
422 fn errors_round_trip_with_kind_and_detail() {
423 let urns: Vec<String> = vec![];
424 let citations: Vec<u32> = vec![];
425 let errors = vec![
426 ValidationError {
427 kind: ValidationErrorKind::Malformed,
428 detail: "empty marker body".to_string(),
429 },
430 ValidationError {
431 kind: ValidationErrorKind::OutOfRange,
432 detail: "marker [^9] references source #9".to_string(),
433 },
434 ];
435 let mut state = base_state("q", &urns, "a", &citations, &errors);
436 state.validation_ok = false;
437 state.retry_count = 1;
438
439 let row = build(&state, Settings::default());
440
441 assert_eq!(row["validation_ok"], json!(false));
442 assert_eq!(row["retry_count"], json!(1));
443 assert_eq!(
444 row["errors"],
445 json!([
446 json!({"kind": "malformed", "detail": "empty marker body"}),
447 json!({"kind": "out_of_range", "detail": "marker [^9] references source #9"}),
448 ])
449 );
450 }
451
452 #[test]
455 fn cache_hit_recorded() {
456 let urns: Vec<String> = vec![];
457 let citations: Vec<u32> = vec![];
458 let errors: Vec<ValidationError> = vec![];
459 let mut state = base_state("q", &urns, "cached", &citations, &errors);
460 state.cache_hit = true;
461 state.prompt_tokens = 0;
462 state.completion_tokens = 0;
463 state.cost_usd = 0.0;
464
465 let row = build(&state, Settings::default());
466
467 assert_eq!(row["cache_hit"], json!(true));
468 assert_eq!(row["cost_usd"], json!(0.0));
471 assert_eq!(row["prompt_tokens"], json!(0));
472 }
473
474 #[test]
477 fn empty_identity_fields_allowed() {
478 let urns: Vec<String> = vec![];
481 let citations: Vec<u32> = vec![];
482 let errors: Vec<ValidationError> = vec![];
483 let mut state = base_state("q", &urns, "a", &citations, &errors);
484 state.tenant = "";
485 state.user = "";
486 state.role = "";
487
488 let row = build(&state, Settings::default());
489
490 assert_eq!(row["tenant"], json!(""));
491 assert_eq!(row["user"], json!(""));
492 assert_eq!(row["role"], json!(""));
493 }
494
495 #[test]
496 fn empty_sources_serializes_as_empty_array() {
497 let urns: Vec<String> = vec![];
498 let citations: Vec<u32> = vec![];
499 let errors: Vec<ValidationError> = vec![];
500 let state = base_state("q", &urns, "a", &citations, &errors);
501
502 let row = build(&state, Settings::default());
503
504 assert_eq!(row["sources_urns"], json!([]));
505 assert_eq!(row["citations"], json!([]));
506 assert_eq!(row["errors"], json!([]));
507 }
508
509 #[test]
510 fn sources_order_preserved() {
511 let urns = vec![
515 "urn:c".to_string(),
516 "urn:a".to_string(),
517 "urn:b".to_string(),
518 ];
519 let citations: Vec<u32> = vec![];
520 let errors: Vec<ValidationError> = vec![];
521 let state = base_state("q", &urns, "a", &citations, &errors);
522
523 let row = build(&state, Settings::default());
524
525 assert_eq!(row["sources_urns"], json!(["urn:c", "urn:a", "urn:b"]));
526 }
527
528 #[test]
529 fn build_is_deterministic_across_calls() {
530 let urns = vec!["urn:a".to_string()];
534 let citations = vec![1u32];
535 let errors: Vec<ValidationError> = vec![];
536 let state = base_state("q", &urns, "a", &citations, &errors);
537
538 let a = build(&state, Settings::default());
539 let b = build(&state, Settings::default());
540 assert_eq!(a, b);
541 }
542}