simple_agents_workflow/evals/
models.rs1use std::path::Path;
2
3use serde::{ser::SerializeStruct, Deserialize, Serialize};
4use serde_json::Value;
5use thiserror::Error;
6
7use crate::yaml_runner::{
8 YamlWorkflowCustomWorkerExecutor, YamlWorkflowExecutionFlags, YamlWorkflowExecutorBinding,
9 YamlWorkflowRunOptions, YamlWorkflowRunOutput,
10};
11
12#[derive(Debug, Error)]
13pub enum EvalError {
14 #[error("failed to read eval dataset '{path}': {source}")]
15 ReadDataset {
16 path: String,
17 source: std::io::Error,
18 },
19 #[error("failed to parse eval dataset '{path}' line {line}: {source}")]
20 ParseDatasetLine {
21 path: String,
22 line: usize,
23 source: serde_json::Error,
24 },
25 #[error("invalid eval dataset: {message}")]
26 InvalidDataset { message: String },
27}
28
29impl Serialize for EvalError {
30 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
31 where
32 S: serde::Serializer,
33 {
34 let mut state = serializer.serialize_struct("EvalError", 3)?;
35 let (code, path) = match self {
36 EvalError::ReadDataset { path, .. } => ("read_dataset_failed", Some(path)),
37 EvalError::ParseDatasetLine { path, .. } => ("parse_dataset_line_failed", Some(path)),
38 EvalError::InvalidDataset { .. } => ("invalid_dataset", None),
39 };
40 state.serialize_field("code", code)?;
41 state.serialize_field("message", &self.to_string())?;
42 if let Some(path) = path {
43 state.serialize_field("path", path)?;
44 }
45 state.end()
46 }
47}
48
49pub struct EvalSuiteRunRequest<'a> {
50 pub suite_id: Option<&'a str>,
51 pub workflow_path: &'a Path,
52 pub dataset_path: &'a Path,
53 pub executor: YamlWorkflowExecutorBinding<'a>,
54 pub custom_worker: Option<&'a dyn YamlWorkflowCustomWorkerExecutor>,
55 pub execution: YamlWorkflowExecutionFlags,
56 pub workflow_options: YamlWorkflowRunOptions,
57 pub max_concurrency: usize,
58}
59
60#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
61#[serde(deny_unknown_fields)]
62pub struct EvalDatasetRecord {
63 pub id: String,
64 pub input: Value,
65 pub expected_output: Value,
66 #[serde(default)]
67 pub rubric: Option<Value>,
68 #[serde(default)]
69 pub custom: Option<Value>,
70 #[serde(default)]
71 pub metadata: Option<Value>,
72}
73
74#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
75#[serde(rename_all = "snake_case")]
76pub enum EvalRunStatus {
77 Passed,
78 Failed,
79 Error,
80}
81
82#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
83pub struct EvalReport {
84 pub suite_id: String,
85 pub status: EvalRunStatus,
86 pub summary: EvalSummary,
87 pub cases: Vec<EvalCaseResult>,
88}
89
90#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
91pub struct EvalSummary {
92 pub total_cases: usize,
93 pub passed_cases: usize,
94 pub failed_cases: usize,
95 pub error_cases: usize,
96 pub pass_rate: f64,
97}
98
99#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
100pub struct EvalCaseResult {
101 pub case_id: String,
102 pub status: EvalRunStatus,
103 #[serde(skip_serializing_if = "Option::is_none")]
104 pub expected: Option<Value>,
105 #[serde(skip_serializing_if = "Option::is_none")]
106 pub actual: Option<Value>,
107 #[serde(default, skip_serializing_if = "Vec::is_empty")]
108 pub evaluations: Vec<EvalResult>,
109 #[serde(skip_serializing_if = "Option::is_none")]
110 pub workflow_output: Option<YamlWorkflowRunOutput>,
111 #[serde(skip_serializing_if = "Option::is_none")]
112 pub error: Option<EvalErrorInfo>,
113}
114
115#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
116pub struct EvalResult {
117 pub id: String,
118 pub status: EvalRunStatus,
119 pub passed: bool,
120 #[serde(skip_serializing_if = "Option::is_none")]
121 pub score: Option<f64>,
122 #[serde(skip_serializing_if = "Option::is_none")]
123 pub expected: Option<Value>,
124 #[serde(skip_serializing_if = "Option::is_none")]
125 pub actual: Option<Value>,
126 #[serde(skip_serializing_if = "Option::is_none")]
127 pub reason: Option<String>,
128 #[serde(skip_serializing_if = "Option::is_none")]
129 pub metadata: Option<Value>,
130}
131
132#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
133pub struct EvalErrorInfo {
134 pub code: String,
135 pub message: String,
136}