pub struct EvalCase {
pub name: String,
pub input: String,
pub expected_tools: Option<Vec<ExpectedToolCall>>,
pub output_contains: Vec<String>,
pub output_not_contains: Vec<String>,
pub reference_output: Option<String>,
pub max_cost_usd: Option<f64>,
pub max_latency_ms: Option<u64>,
pub max_tool_calls: Option<usize>,
}Expand description
A single evaluation test case.
Fields§
§name: StringHuman-readable name for the test case.
input: StringThe task input to send to the agent.
expected_tools: Option<Vec<ExpectedToolCall>>Expected tool calls in order (if Some). Empty vec means “expect no tools.”
output_contains: Vec<String>Strings that should appear in the agent’s output.
output_not_contains: Vec<String>Strings that must NOT appear in the agent’s output.
reference_output: Option<String>Optional reference output for similarity scoring.
max_cost_usd: Option<f64>Maximum acceptable cost in USD for this case.
max_latency_ms: Option<u64>Maximum acceptable total LLM latency in milliseconds.
max_tool_calls: Option<usize>Maximum acceptable number of tool calls.
Implementations§
Source§impl EvalCase
impl EvalCase
Sourcepub fn new(name: impl Into<String>, input: impl Into<String>) -> Self
pub fn new(name: impl Into<String>, input: impl Into<String>) -> Self
Create a new eval case with a name and input task.
Sourcepub fn expect_tool(self, name: impl Into<String>) -> Self
pub fn expect_tool(self, name: impl Into<String>) -> Self
Expect a specific tool to be called (order-independent).
Sourcepub fn expect_tool_at(self, name: impl Into<String>, position: usize) -> Self
pub fn expect_tool_at(self, name: impl Into<String>, position: usize) -> Self
Expect a tool at a specific position in the trajectory (0-indexed).
Sourcepub fn expect_no_tools(self) -> Self
pub fn expect_no_tools(self) -> Self
Expect no tool calls at all.
Sourcepub fn expect_output_contains(self, text: impl Into<String>) -> Self
pub fn expect_output_contains(self, text: impl Into<String>) -> Self
Expect the output to contain a string.
Sourcepub fn expect_output_not_contains(self, text: impl Into<String>) -> Self
pub fn expect_output_not_contains(self, text: impl Into<String>) -> Self
Expect the output to NOT contain a string.
Sourcepub fn reference_output(self, text: impl Into<String>) -> Self
pub fn reference_output(self, text: impl Into<String>) -> Self
Set a reference output for similarity scoring.
Sourcepub fn expect_max_cost_usd(self, max: f64) -> Self
pub fn expect_max_cost_usd(self, max: f64) -> Self
Set maximum acceptable cost in USD.
Sourcepub fn expect_max_latency_ms(self, max: u64) -> Self
pub fn expect_max_latency_ms(self, max: u64) -> Self
Set maximum acceptable total LLM latency in milliseconds.
Sourcepub fn expect_max_tool_calls(self, max: usize) -> Self
pub fn expect_max_tool_calls(self, max: usize) -> Self
Set maximum acceptable number of tool calls.