agent_sdk_eval/
request.rs1use serde::{Deserialize, Serialize};
4
5use agent_sdk_core::{AgentError, ProviderUsage};
6
7use crate::{
8 ComparisonDesign, EvaluationId, EvaluationMetricDelta, EvaluationScope, EvaluationSubject,
9 ExpectedOutcome,
10};
11
12#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
13pub struct EvaluationBudget {
16 pub max_provider_calls: u32,
18 pub max_prompt_chars: usize,
20 pub max_support_refs: usize,
22}
23
24impl Default for EvaluationBudget {
25 fn default() -> Self {
26 Self {
27 max_provider_calls: 1,
28 max_prompt_chars: 4_096,
29 max_support_refs: 8,
30 }
31 }
32}
33
34impl EvaluationBudget {
35 pub fn require_provider_call(&self) -> Result<(), AgentError> {
37 if self.max_provider_calls == 0 {
38 return Err(AgentError::contract_violation(
39 "evaluation budget allows zero provider calls",
40 ));
41 }
42 Ok(())
43 }
44}
45
46#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)]
47pub struct EvaluationUsage {
49 pub provider_calls: u32,
51 pub provider_usage: Option<ProviderUsage>,
53}
54
55#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
56pub struct EvaluationRequest {
58 pub evaluation_id: EvaluationId,
60 pub scope: EvaluationScope,
62 pub subjects: Vec<EvaluationSubject>,
64 pub expected_outcome: ExpectedOutcome,
66 pub comparison: ComparisonDesign,
68 pub metric_deltas: Vec<EvaluationMetricDelta>,
70 pub budget: EvaluationBudget,
72 pub redacted_summary: String,
74}
75
76impl EvaluationRequest {
77 pub fn new(
79 evaluation_id: EvaluationId,
80 scope: EvaluationScope,
81 expected_outcome: ExpectedOutcome,
82 ) -> Self {
83 Self {
84 evaluation_id,
85 scope,
86 subjects: Vec::new(),
87 redacted_summary: expected_outcome.redacted_summary.clone(),
88 expected_outcome,
89 comparison: ComparisonDesign::ObservedOnly,
90 metric_deltas: Vec::new(),
91 budget: EvaluationBudget::default(),
92 }
93 }
94
95 pub fn with_subject(mut self, subject: EvaluationSubject) -> Self {
97 self.subjects.push(subject);
98 self
99 }
100
101 pub fn with_comparison(mut self, comparison: ComparisonDesign) -> Self {
103 self.comparison = comparison;
104 self
105 }
106
107 pub fn with_metric_delta(mut self, metric_delta: EvaluationMetricDelta) -> Self {
109 self.metric_deltas.push(metric_delta);
110 self
111 }
112
113 pub fn with_metric_deltas(
115 mut self,
116 metric_deltas: impl IntoIterator<Item = EvaluationMetricDelta>,
117 ) -> Self {
118 self.metric_deltas.extend(metric_deltas);
119 self
120 }
121
122 pub fn with_budget(mut self, budget: EvaluationBudget) -> Self {
124 self.budget = budget;
125 self
126 }
127}