Skip to main content

zeph_memory/store/
experiments.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use super::SqliteStore;
5use crate::error::MemoryError;
6use zeph_common::SessionId;
7#[allow(unused_imports)]
8use zeph_db::sql;
9
10#[derive(Debug, Clone)]
11pub struct ExperimentResultRow {
12    pub id: i64,
13    pub session_id: SessionId,
14    pub parameter: String,
15    pub value_json: String,
16    pub baseline_score: f64,
17    pub candidate_score: f64,
18    pub delta: f64,
19    pub latency_ms: i64,
20    pub tokens_used: i64,
21    pub accepted: bool,
22    pub source: String,
23    pub created_at: String,
24}
25
26#[derive(Debug, Clone)]
27pub struct NewExperimentResult<'a> {
28    pub session_id: &'a str,
29    pub parameter: &'a str,
30    pub value_json: &'a str,
31    pub baseline_score: f64,
32    pub candidate_score: f64,
33    pub delta: f64,
34    pub latency_ms: i64,
35    pub tokens_used: i64,
36    pub accepted: bool,
37    pub source: &'a str,
38}
39
40#[derive(Debug, Clone)]
41pub struct SessionSummaryRow {
42    pub session_id: SessionId,
43    pub total: i64,
44    pub accepted_count: i64,
45    pub best_delta: f64,
46    pub total_tokens: i64,
47}
48
49/// Validate that `s` looks like `YYYY-MM-DD HH:MM:SS` or `YYYY-MM-DDTHH:MM:SS`.
50fn validate_timestamp(s: &str) -> Result<(), MemoryError> {
51    let bytes = s.as_bytes();
52    // Minimum length: "2000-01-01 00:00:00" = 19 chars
53    if bytes.len() < 19 {
54        return Err(MemoryError::Other(format!(
55            "invalid timestamp format (too short): {s:?}"
56        )));
57    }
58    let sep = bytes[10];
59    if sep != b' ' && sep != b'T' {
60        return Err(MemoryError::Other(format!(
61            "invalid timestamp format (expected space or T at position 10): {s:?}"
62        )));
63    }
64    // Check digit positions: YYYY-MM-DD HH:MM:SS
65    let digits_at = [0, 1, 2, 3, 5, 6, 8, 9, 11, 12, 14, 15, 17, 18];
66    let dashes_at = [4, 7];
67    let colons_at = [13, 16];
68    for i in digits_at {
69        if !bytes[i].is_ascii_digit() {
70            return Err(MemoryError::Other(format!(
71                "invalid timestamp format (expected digit at {i}): {s:?}"
72            )));
73        }
74    }
75    for i in dashes_at {
76        if bytes[i] != b'-' {
77            return Err(MemoryError::Other(format!(
78                "invalid timestamp format (expected '-' at {i}): {s:?}"
79            )));
80        }
81    }
82    for i in colons_at {
83        if bytes[i] != b':' {
84            return Err(MemoryError::Other(format!(
85                "invalid timestamp format (expected ':' at {i}): {s:?}"
86            )));
87        }
88    }
89    Ok(())
90}
91
92type ResultTuple = (
93    i64,
94    String,
95    String,
96    String,
97    f64,
98    f64,
99    f64,
100    i64,
101    i64,
102    bool,
103    String,
104    String,
105);
106
107fn row_from_tuple(t: ResultTuple) -> ExperimentResultRow {
108    ExperimentResultRow {
109        id: t.0,
110        session_id: SessionId::new(t.1),
111        parameter: t.2,
112        value_json: t.3,
113        baseline_score: t.4,
114        candidate_score: t.5,
115        delta: t.6,
116        latency_ms: t.7,
117        tokens_used: t.8,
118        accepted: t.9,
119        source: t.10,
120        created_at: t.11,
121    }
122}
123
124impl SqliteStore {
125    /// Insert an experiment result and return the new row ID.
126    ///
127    /// # Errors
128    ///
129    /// Returns an error if the insert fails.
130    pub async fn insert_experiment_result(
131        &self,
132        result: &NewExperimentResult<'_>,
133    ) -> Result<i64, MemoryError> {
134        let row: (i64,) = zeph_db::query_as(sql!(
135            "INSERT INTO experiment_results \
136             (session_id, parameter, value_json, baseline_score, candidate_score, \
137              delta, latency_ms, tokens_used, accepted, source) \
138             VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) RETURNING id"
139        ))
140        .bind(result.session_id)
141        .bind(result.parameter)
142        .bind(result.value_json)
143        .bind(result.baseline_score)
144        .bind(result.candidate_score)
145        .bind(result.delta)
146        .bind(result.latency_ms)
147        .bind(result.tokens_used)
148        .bind(result.accepted)
149        .bind(result.source)
150        .fetch_one(&self.pool)
151        .await?;
152        Ok(row.0)
153    }
154
155    /// List experiment results, optionally filtered by `session_id`, newest first.
156    ///
157    /// # Errors
158    ///
159    /// Returns an error if the query fails.
160    pub async fn list_experiment_results(
161        &self,
162        session_id: Option<&str>,
163        limit: u32,
164    ) -> Result<Vec<ExperimentResultRow>, MemoryError> {
165        let rows: Vec<ResultTuple> = if let Some(sid) = session_id {
166            zeph_db::query_as(sql!(
167                "SELECT id, session_id, parameter, value_json, baseline_score, candidate_score, \
168                 delta, latency_ms, tokens_used, accepted, source, created_at \
169                 FROM experiment_results WHERE session_id = ? ORDER BY id DESC LIMIT ?"
170            ))
171            .bind(sid)
172            .bind(limit)
173            .fetch_all(&self.pool)
174            .await?
175        } else {
176            zeph_db::query_as(sql!(
177                "SELECT id, session_id, parameter, value_json, baseline_score, candidate_score, \
178                 delta, latency_ms, tokens_used, accepted, source, created_at \
179                 FROM experiment_results ORDER BY id DESC LIMIT ?"
180            ))
181            .bind(limit)
182            .fetch_all(&self.pool)
183            .await?
184        };
185        Ok(rows.into_iter().map(row_from_tuple).collect())
186    }
187
188    /// Get the best accepted result (highest delta), optionally filtered by parameter.
189    ///
190    /// # Errors
191    ///
192    /// Returns an error if the query fails.
193    pub async fn best_experiment_result(
194        &self,
195        parameter: Option<&str>,
196    ) -> Result<Option<ExperimentResultRow>, MemoryError> {
197        let row: Option<ResultTuple> = if let Some(param) = parameter {
198            zeph_db::query_as(sql!(
199                "SELECT id, session_id, parameter, value_json, baseline_score, candidate_score, \
200                 delta, latency_ms, tokens_used, accepted, source, created_at \
201                 FROM experiment_results \
202                 WHERE accepted = 1 AND parameter = ? ORDER BY delta DESC LIMIT 1"
203            ))
204            .bind(param)
205            .fetch_optional(&self.pool)
206            .await?
207        } else {
208            zeph_db::query_as(sql!(
209                "SELECT id, session_id, parameter, value_json, baseline_score, candidate_score, \
210                 delta, latency_ms, tokens_used, accepted, source, created_at \
211                 FROM experiment_results \
212                 WHERE accepted = 1 ORDER BY delta DESC LIMIT 1"
213            ))
214            .fetch_optional(&self.pool)
215            .await?
216        };
217        Ok(row.map(row_from_tuple))
218    }
219
220    /// Get all results since a given ISO-8601 timestamp (`YYYY-MM-DD HH:MM:SS` or `YYYY-MM-DDTHH:MM:SS`).
221    ///
222    /// # Errors
223    ///
224    /// Returns `MemoryError::Other` if `since` does not match the expected timestamp format.
225    /// Returns an error if the query fails.
226    pub async fn experiment_results_since(
227        &self,
228        since: &str,
229    ) -> Result<Vec<ExperimentResultRow>, MemoryError> {
230        validate_timestamp(since)?;
231        let rows: Vec<ResultTuple> = zeph_db::query_as(sql!(
232            "SELECT id, session_id, parameter, value_json, baseline_score, candidate_score, \
233             delta, latency_ms, tokens_used, accepted, source, created_at \
234             FROM experiment_results WHERE created_at >= ? ORDER BY id DESC LIMIT 10000"
235        ))
236        .bind(since)
237        .fetch_all(&self.pool)
238        .await?;
239        Ok(rows.into_iter().map(row_from_tuple).collect())
240    }
241
242    /// Get a summary for a specific session.
243    ///
244    /// # Errors
245    ///
246    /// Returns an error if the query fails.
247    pub async fn experiment_session_summary(
248        &self,
249        session_id: &str,
250    ) -> Result<Option<SessionSummaryRow>, MemoryError> {
251        let row: Option<(String, i64, i64, Option<f64>, i64)> = zeph_db::query_as(sql!(
252            "SELECT session_id, COUNT(*) as total, \
253             SUM(CASE WHEN accepted = 1 THEN 1 ELSE 0 END) as accepted_count, \
254             MAX(CASE WHEN accepted = 1 THEN delta ELSE NULL END) as best_delta, \
255             SUM(tokens_used) as total_tokens \
256             FROM experiment_results WHERE session_id = ? GROUP BY session_id"
257        ))
258        .bind(session_id)
259        .fetch_optional(&self.pool)
260        .await?;
261        Ok(row.map(
262            |(sid, total, accepted_count, best_delta, total_tokens)| SessionSummaryRow {
263                session_id: SessionId::new(sid),
264                total,
265                accepted_count,
266                best_delta: best_delta.unwrap_or(0.0),
267                total_tokens,
268            },
269        ))
270    }
271}
272
273#[cfg(test)]
274mod tests {
275    use super::*;
276
277    async fn test_store() -> SqliteStore {
278        SqliteStore::new(":memory:").await.unwrap()
279    }
280
281    fn make_result<'a>(
282        session_id: &'a str,
283        parameter: &'a str,
284        accepted: bool,
285        delta: f64,
286    ) -> NewExperimentResult<'a> {
287        NewExperimentResult {
288            session_id,
289            parameter,
290            value_json: r#"{"type":"Float","value":0.7}"#,
291            baseline_score: 7.0,
292            candidate_score: 7.0 + delta,
293            delta,
294            latency_ms: 500,
295            tokens_used: 100,
296            accepted,
297            source: "manual",
298        }
299    }
300
301    #[tokio::test]
302    async fn insert_and_list_results() {
303        let store = test_store().await;
304        let r = make_result("session-1", "temperature", true, 1.0);
305        let id = store.insert_experiment_result(&r).await.unwrap();
306        assert!(id > 0);
307
308        let rows = store
309            .list_experiment_results(Some("session-1"), 10)
310            .await
311            .unwrap();
312        assert_eq!(rows.len(), 1);
313        assert_eq!(rows[0].session_id, "session-1");
314        assert_eq!(rows[0].parameter, "temperature");
315        assert!(rows[0].accepted);
316        assert!((rows[0].delta - 1.0).abs() < f64::EPSILON);
317    }
318
319    #[tokio::test]
320    async fn list_results_no_filter_returns_all() {
321        let store = test_store().await;
322        store
323            .insert_experiment_result(&make_result("s1", "temperature", true, 1.0))
324            .await
325            .unwrap();
326        store
327            .insert_experiment_result(&make_result("s2", "top_p", false, -0.2))
328            .await
329            .unwrap();
330
331        let rows = store.list_experiment_results(None, 10).await.unwrap();
332        assert_eq!(rows.len(), 2);
333        // newest first
334        assert_eq!(rows[0].session_id, "s2");
335    }
336
337    #[tokio::test]
338    async fn best_result_returns_accepted_highest_delta() {
339        let store = test_store().await;
340        store
341            .insert_experiment_result(&make_result("s1", "temperature", false, 2.0))
342            .await
343            .unwrap();
344        store
345            .insert_experiment_result(&make_result("s1", "temperature", true, 0.5))
346            .await
347            .unwrap();
348        store
349            .insert_experiment_result(&make_result("s1", "temperature", true, 1.5))
350            .await
351            .unwrap();
352
353        let best = store.best_experiment_result(None).await.unwrap().unwrap();
354        assert!(best.accepted);
355        assert!((best.delta - 1.5).abs() < f64::EPSILON);
356    }
357
358    #[tokio::test]
359    async fn best_result_filtered_by_parameter() {
360        let store = test_store().await;
361        store
362            .insert_experiment_result(&make_result("s1", "temperature", true, 2.0))
363            .await
364            .unwrap();
365        store
366            .insert_experiment_result(&make_result("s1", "top_p", true, 1.0))
367            .await
368            .unwrap();
369
370        let best = store
371            .best_experiment_result(Some("top_p"))
372            .await
373            .unwrap()
374            .unwrap();
375        assert_eq!(best.parameter, "top_p");
376    }
377
378    #[tokio::test]
379    async fn best_result_no_accepted_returns_none() {
380        let store = test_store().await;
381        store
382            .insert_experiment_result(&make_result("s1", "temperature", false, 2.0))
383            .await
384            .unwrap();
385        let best = store.best_experiment_result(None).await.unwrap();
386        assert!(best.is_none());
387    }
388
389    #[tokio::test]
390    async fn session_summary_aggregation() {
391        let store = test_store().await;
392        store
393            .insert_experiment_result(&make_result("sess", "temperature", true, 1.0))
394            .await
395            .unwrap();
396        store
397            .insert_experiment_result(&make_result("sess", "top_p", false, -0.2))
398            .await
399            .unwrap();
400        store
401            .insert_experiment_result(&make_result("sess", "top_k", true, 0.8))
402            .await
403            .unwrap();
404
405        let summary = store
406            .experiment_session_summary("sess")
407            .await
408            .unwrap()
409            .unwrap();
410        assert_eq!(summary.session_id, "sess");
411        assert_eq!(summary.total, 3);
412        assert_eq!(summary.accepted_count, 2);
413        assert!((summary.best_delta - 1.0).abs() < f64::EPSILON);
414        assert_eq!(summary.total_tokens, 300);
415    }
416
417    #[tokio::test]
418    async fn session_summary_unknown_session_returns_none() {
419        let store = test_store().await;
420        let summary = store
421            .experiment_session_summary("nonexistent")
422            .await
423            .unwrap();
424        assert!(summary.is_none());
425    }
426
427    #[tokio::test]
428    async fn results_since_time_filtering() {
429        let store = test_store().await;
430        // Insert a result, then query with a future timestamp — expect nothing
431        store
432            .insert_experiment_result(&make_result("s1", "temperature", true, 1.0))
433            .await
434            .unwrap();
435
436        let rows = store
437            .experiment_results_since("2099-01-01 00:00:00")
438            .await
439            .unwrap();
440        assert!(rows.is_empty());
441
442        // Query with a past timestamp — expect the result
443        let rows = store
444            .experiment_results_since("2000-01-01 00:00:00")
445            .await
446            .unwrap();
447        assert_eq!(rows.len(), 1);
448    }
449
450    #[tokio::test]
451    async fn results_since_rejects_invalid_timestamp() {
452        let store = test_store().await;
453        let bad = [
454            "",
455            "not-a-date",
456            "0000-00-00",
457            "2024-01-01",
458            "2024/01/01 00:00:00",
459        ];
460        for ts in bad {
461            let err = store.experiment_results_since(ts).await;
462            assert!(err.is_err(), "expected error for timestamp: {ts:?}");
463        }
464        // ISO-8601 with T separator should work
465        let store2 = test_store().await;
466        let rows = store2
467            .experiment_results_since("2000-01-01T00:00:00")
468            .await
469            .unwrap();
470        assert!(rows.is_empty());
471    }
472
473    #[tokio::test]
474    async fn list_results_respects_limit() {
475        let store = test_store().await;
476        for i in 0..5 {
477            store
478                .insert_experiment_result(&make_result(
479                    "s1",
480                    "temperature",
481                    i % 2 == 0,
482                    f64::from(i),
483                ))
484                .await
485                .unwrap();
486        }
487        let rows = store.list_experiment_results(None, 3).await.unwrap();
488        assert_eq!(rows.len(), 3);
489    }
490}