1use super::SqliteStore;
5use crate::error::MemoryError;
6use zeph_common::SessionId;
7#[allow(unused_imports)]
8use zeph_db::sql;
9
10#[derive(Debug, Clone)]
11pub struct ExperimentResultRow {
12 pub id: i64,
13 pub session_id: SessionId,
14 pub parameter: String,
15 pub value_json: String,
16 pub baseline_score: f64,
17 pub candidate_score: f64,
18 pub delta: f64,
19 pub latency_ms: i64,
20 pub tokens_used: i64,
21 pub accepted: bool,
22 pub source: String,
23 pub created_at: String,
24}
25
26#[derive(Debug, Clone)]
27pub struct NewExperimentResult<'a> {
28 pub session_id: &'a str,
29 pub parameter: &'a str,
30 pub value_json: &'a str,
31 pub baseline_score: f64,
32 pub candidate_score: f64,
33 pub delta: f64,
34 pub latency_ms: i64,
35 pub tokens_used: i64,
36 pub accepted: bool,
37 pub source: &'a str,
38}
39
40#[derive(Debug, Clone)]
41pub struct SessionSummaryRow {
42 pub session_id: SessionId,
43 pub total: i64,
44 pub accepted_count: i64,
45 pub best_delta: f64,
46 pub total_tokens: i64,
47}
48
49fn validate_timestamp(s: &str) -> Result<(), MemoryError> {
51 let bytes = s.as_bytes();
52 if bytes.len() < 19 {
54 return Err(MemoryError::Other(format!(
55 "invalid timestamp format (too short): {s:?}"
56 )));
57 }
58 let sep = bytes[10];
59 if sep != b' ' && sep != b'T' {
60 return Err(MemoryError::Other(format!(
61 "invalid timestamp format (expected space or T at position 10): {s:?}"
62 )));
63 }
64 let digits_at = [0, 1, 2, 3, 5, 6, 8, 9, 11, 12, 14, 15, 17, 18];
66 let dashes_at = [4, 7];
67 let colons_at = [13, 16];
68 for i in digits_at {
69 if !bytes[i].is_ascii_digit() {
70 return Err(MemoryError::Other(format!(
71 "invalid timestamp format (expected digit at {i}): {s:?}"
72 )));
73 }
74 }
75 for i in dashes_at {
76 if bytes[i] != b'-' {
77 return Err(MemoryError::Other(format!(
78 "invalid timestamp format (expected '-' at {i}): {s:?}"
79 )));
80 }
81 }
82 for i in colons_at {
83 if bytes[i] != b':' {
84 return Err(MemoryError::Other(format!(
85 "invalid timestamp format (expected ':' at {i}): {s:?}"
86 )));
87 }
88 }
89 Ok(())
90}
91
92type ResultTuple = (
93 i64,
94 String,
95 String,
96 String,
97 f64,
98 f64,
99 f64,
100 i64,
101 i64,
102 bool,
103 String,
104 String,
105);
106
107fn row_from_tuple(t: ResultTuple) -> ExperimentResultRow {
108 ExperimentResultRow {
109 id: t.0,
110 session_id: SessionId::new(t.1),
111 parameter: t.2,
112 value_json: t.3,
113 baseline_score: t.4,
114 candidate_score: t.5,
115 delta: t.6,
116 latency_ms: t.7,
117 tokens_used: t.8,
118 accepted: t.9,
119 source: t.10,
120 created_at: t.11,
121 }
122}
123
124impl SqliteStore {
125 pub async fn insert_experiment_result(
131 &self,
132 result: &NewExperimentResult<'_>,
133 ) -> Result<i64, MemoryError> {
134 let row: (i64,) = zeph_db::query_as(sql!(
135 "INSERT INTO experiment_results \
136 (session_id, parameter, value_json, baseline_score, candidate_score, \
137 delta, latency_ms, tokens_used, accepted, source) \
138 VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) RETURNING id"
139 ))
140 .bind(result.session_id)
141 .bind(result.parameter)
142 .bind(result.value_json)
143 .bind(result.baseline_score)
144 .bind(result.candidate_score)
145 .bind(result.delta)
146 .bind(result.latency_ms)
147 .bind(result.tokens_used)
148 .bind(result.accepted)
149 .bind(result.source)
150 .fetch_one(&self.pool)
151 .await?;
152 Ok(row.0)
153 }
154
155 pub async fn list_experiment_results(
161 &self,
162 session_id: Option<&str>,
163 limit: u32,
164 ) -> Result<Vec<ExperimentResultRow>, MemoryError> {
165 let rows: Vec<ResultTuple> = if let Some(sid) = session_id {
166 zeph_db::query_as(sql!(
167 "SELECT id, session_id, parameter, value_json, baseline_score, candidate_score, \
168 delta, latency_ms, tokens_used, accepted, source, created_at \
169 FROM experiment_results WHERE session_id = ? ORDER BY id DESC LIMIT ?"
170 ))
171 .bind(sid)
172 .bind(limit)
173 .fetch_all(&self.pool)
174 .await?
175 } else {
176 zeph_db::query_as(sql!(
177 "SELECT id, session_id, parameter, value_json, baseline_score, candidate_score, \
178 delta, latency_ms, tokens_used, accepted, source, created_at \
179 FROM experiment_results ORDER BY id DESC LIMIT ?"
180 ))
181 .bind(limit)
182 .fetch_all(&self.pool)
183 .await?
184 };
185 Ok(rows.into_iter().map(row_from_tuple).collect())
186 }
187
188 pub async fn best_experiment_result(
194 &self,
195 parameter: Option<&str>,
196 ) -> Result<Option<ExperimentResultRow>, MemoryError> {
197 let row: Option<ResultTuple> = if let Some(param) = parameter {
198 zeph_db::query_as(sql!(
199 "SELECT id, session_id, parameter, value_json, baseline_score, candidate_score, \
200 delta, latency_ms, tokens_used, accepted, source, created_at \
201 FROM experiment_results \
202 WHERE accepted = 1 AND parameter = ? ORDER BY delta DESC LIMIT 1"
203 ))
204 .bind(param)
205 .fetch_optional(&self.pool)
206 .await?
207 } else {
208 zeph_db::query_as(sql!(
209 "SELECT id, session_id, parameter, value_json, baseline_score, candidate_score, \
210 delta, latency_ms, tokens_used, accepted, source, created_at \
211 FROM experiment_results \
212 WHERE accepted = 1 ORDER BY delta DESC LIMIT 1"
213 ))
214 .fetch_optional(&self.pool)
215 .await?
216 };
217 Ok(row.map(row_from_tuple))
218 }
219
220 pub async fn experiment_results_since(
227 &self,
228 since: &str,
229 ) -> Result<Vec<ExperimentResultRow>, MemoryError> {
230 validate_timestamp(since)?;
231 let rows: Vec<ResultTuple> = zeph_db::query_as(sql!(
232 "SELECT id, session_id, parameter, value_json, baseline_score, candidate_score, \
233 delta, latency_ms, tokens_used, accepted, source, created_at \
234 FROM experiment_results WHERE created_at >= ? ORDER BY id DESC LIMIT 10000"
235 ))
236 .bind(since)
237 .fetch_all(&self.pool)
238 .await?;
239 Ok(rows.into_iter().map(row_from_tuple).collect())
240 }
241
242 pub async fn experiment_session_summary(
248 &self,
249 session_id: &str,
250 ) -> Result<Option<SessionSummaryRow>, MemoryError> {
251 let row: Option<(String, i64, i64, Option<f64>, i64)> = zeph_db::query_as(sql!(
252 "SELECT session_id, COUNT(*) as total, \
253 SUM(CASE WHEN accepted = 1 THEN 1 ELSE 0 END) as accepted_count, \
254 MAX(CASE WHEN accepted = 1 THEN delta ELSE NULL END) as best_delta, \
255 SUM(tokens_used) as total_tokens \
256 FROM experiment_results WHERE session_id = ? GROUP BY session_id"
257 ))
258 .bind(session_id)
259 .fetch_optional(&self.pool)
260 .await?;
261 Ok(row.map(
262 |(sid, total, accepted_count, best_delta, total_tokens)| SessionSummaryRow {
263 session_id: SessionId::new(sid),
264 total,
265 accepted_count,
266 best_delta: best_delta.unwrap_or(0.0),
267 total_tokens,
268 },
269 ))
270 }
271}
272
273#[cfg(test)]
274mod tests {
275 use super::*;
276
277 async fn test_store() -> SqliteStore {
278 SqliteStore::new(":memory:").await.unwrap()
279 }
280
281 fn make_result<'a>(
282 session_id: &'a str,
283 parameter: &'a str,
284 accepted: bool,
285 delta: f64,
286 ) -> NewExperimentResult<'a> {
287 NewExperimentResult {
288 session_id,
289 parameter,
290 value_json: r#"{"type":"Float","value":0.7}"#,
291 baseline_score: 7.0,
292 candidate_score: 7.0 + delta,
293 delta,
294 latency_ms: 500,
295 tokens_used: 100,
296 accepted,
297 source: "manual",
298 }
299 }
300
301 #[tokio::test]
302 async fn insert_and_list_results() {
303 let store = test_store().await;
304 let r = make_result("session-1", "temperature", true, 1.0);
305 let id = store.insert_experiment_result(&r).await.unwrap();
306 assert!(id > 0);
307
308 let rows = store
309 .list_experiment_results(Some("session-1"), 10)
310 .await
311 .unwrap();
312 assert_eq!(rows.len(), 1);
313 assert_eq!(rows[0].session_id, "session-1");
314 assert_eq!(rows[0].parameter, "temperature");
315 assert!(rows[0].accepted);
316 assert!((rows[0].delta - 1.0).abs() < f64::EPSILON);
317 }
318
319 #[tokio::test]
320 async fn list_results_no_filter_returns_all() {
321 let store = test_store().await;
322 store
323 .insert_experiment_result(&make_result("s1", "temperature", true, 1.0))
324 .await
325 .unwrap();
326 store
327 .insert_experiment_result(&make_result("s2", "top_p", false, -0.2))
328 .await
329 .unwrap();
330
331 let rows = store.list_experiment_results(None, 10).await.unwrap();
332 assert_eq!(rows.len(), 2);
333 assert_eq!(rows[0].session_id, "s2");
335 }
336
337 #[tokio::test]
338 async fn best_result_returns_accepted_highest_delta() {
339 let store = test_store().await;
340 store
341 .insert_experiment_result(&make_result("s1", "temperature", false, 2.0))
342 .await
343 .unwrap();
344 store
345 .insert_experiment_result(&make_result("s1", "temperature", true, 0.5))
346 .await
347 .unwrap();
348 store
349 .insert_experiment_result(&make_result("s1", "temperature", true, 1.5))
350 .await
351 .unwrap();
352
353 let best = store.best_experiment_result(None).await.unwrap().unwrap();
354 assert!(best.accepted);
355 assert!((best.delta - 1.5).abs() < f64::EPSILON);
356 }
357
358 #[tokio::test]
359 async fn best_result_filtered_by_parameter() {
360 let store = test_store().await;
361 store
362 .insert_experiment_result(&make_result("s1", "temperature", true, 2.0))
363 .await
364 .unwrap();
365 store
366 .insert_experiment_result(&make_result("s1", "top_p", true, 1.0))
367 .await
368 .unwrap();
369
370 let best = store
371 .best_experiment_result(Some("top_p"))
372 .await
373 .unwrap()
374 .unwrap();
375 assert_eq!(best.parameter, "top_p");
376 }
377
378 #[tokio::test]
379 async fn best_result_no_accepted_returns_none() {
380 let store = test_store().await;
381 store
382 .insert_experiment_result(&make_result("s1", "temperature", false, 2.0))
383 .await
384 .unwrap();
385 let best = store.best_experiment_result(None).await.unwrap();
386 assert!(best.is_none());
387 }
388
389 #[tokio::test]
390 async fn session_summary_aggregation() {
391 let store = test_store().await;
392 store
393 .insert_experiment_result(&make_result("sess", "temperature", true, 1.0))
394 .await
395 .unwrap();
396 store
397 .insert_experiment_result(&make_result("sess", "top_p", false, -0.2))
398 .await
399 .unwrap();
400 store
401 .insert_experiment_result(&make_result("sess", "top_k", true, 0.8))
402 .await
403 .unwrap();
404
405 let summary = store
406 .experiment_session_summary("sess")
407 .await
408 .unwrap()
409 .unwrap();
410 assert_eq!(summary.session_id, "sess");
411 assert_eq!(summary.total, 3);
412 assert_eq!(summary.accepted_count, 2);
413 assert!((summary.best_delta - 1.0).abs() < f64::EPSILON);
414 assert_eq!(summary.total_tokens, 300);
415 }
416
417 #[tokio::test]
418 async fn session_summary_unknown_session_returns_none() {
419 let store = test_store().await;
420 let summary = store
421 .experiment_session_summary("nonexistent")
422 .await
423 .unwrap();
424 assert!(summary.is_none());
425 }
426
427 #[tokio::test]
428 async fn results_since_time_filtering() {
429 let store = test_store().await;
430 store
432 .insert_experiment_result(&make_result("s1", "temperature", true, 1.0))
433 .await
434 .unwrap();
435
436 let rows = store
437 .experiment_results_since("2099-01-01 00:00:00")
438 .await
439 .unwrap();
440 assert!(rows.is_empty());
441
442 let rows = store
444 .experiment_results_since("2000-01-01 00:00:00")
445 .await
446 .unwrap();
447 assert_eq!(rows.len(), 1);
448 }
449
450 #[tokio::test]
451 async fn results_since_rejects_invalid_timestamp() {
452 let store = test_store().await;
453 let bad = [
454 "",
455 "not-a-date",
456 "0000-00-00",
457 "2024-01-01",
458 "2024/01/01 00:00:00",
459 ];
460 for ts in bad {
461 let err = store.experiment_results_since(ts).await;
462 assert!(err.is_err(), "expected error for timestamp: {ts:?}");
463 }
464 let store2 = test_store().await;
466 let rows = store2
467 .experiment_results_since("2000-01-01T00:00:00")
468 .await
469 .unwrap();
470 assert!(rows.is_empty());
471 }
472
473 #[tokio::test]
474 async fn list_results_respects_limit() {
475 let store = test_store().await;
476 for i in 0..5 {
477 store
478 .insert_experiment_result(&make_result(
479 "s1",
480 "temperature",
481 i % 2 == 0,
482 f64::from(i),
483 ))
484 .await
485 .unwrap();
486 }
487 let rows = store.list_experiment_results(None, 3).await.unwrap();
488 assert_eq!(rows.len(), 3);
489 }
490}