Skip to main content

synth_ai_core/api/
eval.rs

1//! Eval API client.
2//!
3//! This module provides methods for submitting and managing evaluation jobs.
4
5use std::time::{Duration, Instant};
6
7use serde_json::Value;
8
9use crate::http::HttpError;
10use crate::polling::{calculate_backoff, BackoffConfig};
11use crate::CoreError;
12
13use super::client::SynthClient;
14use super::types::{CancelRequest, EvalJobRequest, EvalJobStatus, EvalResult, JobSubmitResponse};
15
16/// API endpoint for evaluation jobs.
17const EVAL_ENDPOINT: &str = "/api/eval/jobs";
18
19/// Eval API client.
20///
21/// Use this to submit, poll, and cancel evaluation jobs.
22pub struct EvalClient<'a> {
23    client: &'a SynthClient,
24}
25
26impl<'a> EvalClient<'a> {
27    /// Create a new Eval client.
28    pub(crate) fn new(client: &'a SynthClient) -> Self {
29        Self { client }
30    }
31
32    /// Submit an evaluation job.
33    ///
34    /// # Arguments
35    ///
36    /// * `request` - The eval job configuration
37    ///
38    /// # Returns
39    ///
40    /// The job ID on success.
41    ///
42    /// # Example
43    ///
44    /// ```ignore
45    /// let job_id = client.eval().submit(EvalJobRequest {
46    ///     task_app_url: "http://localhost:8000".into(),
47    ///     env_name: "default".into(),
48    ///     seeds: vec![1, 2, 3, 4, 5],
49    ///     policy: PolicyConfig::default(),
50    ///     ..Default::default()
51    /// }).await?;
52    /// ```
53    pub async fn submit(&self, request: EvalJobRequest) -> Result<String, CoreError> {
54        let body = serde_json::to_value(&request)
55            .map_err(|e| CoreError::Validation(format!("failed to serialize request: {}", e)))?;
56
57        let response: JobSubmitResponse = self
58            .client
59            .http
60            .post_json(EVAL_ENDPOINT, &body)
61            .await
62            .map_err(map_http_error)?;
63
64        Ok(response.job_id)
65    }
66
67    /// Submit a raw evaluation job from a JSON value.
68    pub async fn submit_raw(&self, request: Value) -> Result<String, CoreError> {
69        let response: JobSubmitResponse = self
70            .client
71            .http
72            .post_json(EVAL_ENDPOINT, &request)
73            .await
74            .map_err(map_http_error)?;
75
76        Ok(response.job_id)
77    }
78
79    /// Get the current status of an eval job.
80    ///
81    /// # Arguments
82    ///
83    /// * `job_id` - The job ID to check
84    ///
85    /// # Returns
86    ///
87    /// The current eval result including status, mean reward, etc.
88    pub async fn get_status(&self, job_id: &str) -> Result<EvalResult, CoreError> {
89        let path = format!("{}/{}", EVAL_ENDPOINT, job_id);
90
91        self.client
92            .http
93            .get(&path, None)
94            .await
95            .map_err(map_http_error)
96    }
97
98    /// Poll an eval job until it reaches a terminal state.
99    ///
100    /// # Arguments
101    ///
102    /// * `job_id` - The job ID to poll
103    /// * `timeout_secs` - Maximum time to wait (in seconds)
104    /// * `interval_secs` - Initial polling interval (in seconds)
105    ///
106    /// # Returns
107    ///
108    /// The final eval result.
109    ///
110    /// # Example
111    ///
112    /// ```ignore
113    /// let result = client.eval().poll_until_complete(&job_id, 1800.0, 10.0).await?;
114    /// println!("Mean reward: {:?}", result.mean_reward);
115    /// ```
116    pub async fn poll_until_complete(
117        &self,
118        job_id: &str,
119        timeout_secs: f64,
120        interval_secs: f64,
121    ) -> Result<EvalResult, CoreError> {
122        let start = Instant::now();
123        let timeout = Duration::from_secs_f64(timeout_secs);
124        let base_interval_ms = (interval_secs * 1000.0) as u64;
125
126        let config = BackoffConfig::new(base_interval_ms, 60000, 4);
127
128        let mut consecutive_errors = 0u32;
129        let max_errors = 10u32;
130
131        loop {
132            // Check timeout
133            if start.elapsed() > timeout {
134                return Err(CoreError::Timeout(format!(
135                    "eval job {} did not complete within {:.0} seconds",
136                    job_id, timeout_secs
137                )));
138            }
139
140            // Get status
141            match self.get_status(job_id).await {
142                Ok(result) => {
143                    consecutive_errors = 0;
144
145                    if result.status.is_terminal() {
146                        return Ok(result);
147                    }
148
149                    // Wait before next poll
150                    let delay = calculate_backoff(&config, 0);
151                    tokio::time::sleep(delay).await;
152                }
153                Err(e) => {
154                    consecutive_errors += 1;
155
156                    if consecutive_errors >= max_errors {
157                        return Err(CoreError::Internal(format!(
158                            "too many consecutive errors polling eval job {}: {}",
159                            job_id, e
160                        )));
161                    }
162
163                    // Backoff on errors
164                    let delay = calculate_backoff(&config, consecutive_errors);
165                    tokio::time::sleep(delay).await;
166                }
167            }
168        }
169    }
170
171    /// Cancel a running eval job.
172    ///
173    /// # Arguments
174    ///
175    /// * `job_id` - The job ID to cancel
176    pub async fn cancel(&self, job_id: &str) -> Result<(), CoreError> {
177        let path = format!("{}/{}/cancel", EVAL_ENDPOINT, job_id);
178        let body = serde_json::to_value(&CancelRequest { reason: None })
179            .unwrap_or(Value::Object(serde_json::Map::new()));
180
181        let _: Value = self
182            .client
183            .http
184            .post_json(&path, &body)
185            .await
186            .map_err(map_http_error)?;
187
188        Ok(())
189    }
190
191    /// List recent eval jobs.
192    ///
193    /// # Arguments
194    ///
195    /// * `limit` - Maximum number of jobs to return
196    /// * `status` - Optional status filter
197    pub async fn list(
198        &self,
199        limit: Option<i32>,
200        status: Option<EvalJobStatus>,
201    ) -> Result<Vec<EvalResult>, CoreError> {
202        let mut params = vec![];
203
204        let limit_str;
205        if let Some(l) = limit {
206            limit_str = l.to_string();
207            params.push(("limit", limit_str.as_str()));
208        }
209
210        let status_str;
211        if let Some(s) = status {
212            status_str = s.as_str().to_string();
213            params.push(("status", status_str.as_str()));
214        }
215
216        let params_ref: Option<&[(&str, &str)]> = if params.is_empty() {
217            None
218        } else {
219            Some(&params)
220        };
221
222        self.client
223            .http
224            .get(EVAL_ENDPOINT, params_ref)
225            .await
226            .map_err(map_http_error)
227    }
228}
229
230/// Map HTTP errors to CoreError.
231fn map_http_error(e: HttpError) -> CoreError {
232    match e {
233        HttpError::Response(detail) => {
234            if detail.status == 401 || detail.status == 403 {
235                CoreError::Authentication(format!("authentication failed: {}", detail))
236            } else if detail.status == 429 {
237                CoreError::UsageLimit(crate::UsageLimitInfo {
238                    limit_type: "rate_limit".to_string(),
239                    api: "eval".to_string(),
240                    current: 0.0,
241                    limit: 0.0,
242                    tier: "unknown".to_string(),
243                    retry_after_seconds: None,
244                    upgrade_url: "https://usesynth.ai/pricing".to_string(),
245                })
246            } else {
247                CoreError::HttpResponse(crate::HttpErrorInfo {
248                    status: detail.status,
249                    url: detail.url,
250                    message: detail.message,
251                    body_snippet: detail.body_snippet,
252                })
253            }
254        }
255        HttpError::Request(e) => CoreError::Http(e),
256        _ => CoreError::Internal(format!("{}", e)),
257    }
258}
259
260#[cfg(test)]
261mod tests {
262    use super::*;
263
264    #[test]
265    fn test_eval_endpoint() {
266        assert_eq!(EVAL_ENDPOINT, "/api/eval/jobs");
267    }
268}