Skip to main content

agent_sdk_eval/
request.rs

1//! Evaluation request, budget, and usage records.
2
3use serde::{Deserialize, Serialize};
4
5use agent_sdk_core::{AgentError, ProviderUsage};
6
7use crate::{
8    ComparisonDesign, EvaluationId, EvaluationMetricDelta, EvaluationScope, EvaluationSubject,
9    ExpectedOutcome,
10};
11
12#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
13/// Budget for evaluator work. Provider-backed evaluators should reject requests
14/// that cannot fit this budget rather than silently making extra calls.
15pub struct EvaluationBudget {
16    /// Maximum provider calls an evaluator may make for this evaluation.
17    pub max_provider_calls: u32,
18    /// Maximum prompt characters sent to a provider-backed evaluator.
19    pub max_prompt_chars: usize,
20    /// Maximum cited support refs accepted from evaluator output.
21    pub max_support_refs: usize,
22}
23
24impl Default for EvaluationBudget {
25    fn default() -> Self {
26        Self {
27            max_provider_calls: 1,
28            max_prompt_chars: 4_096,
29            max_support_refs: 8,
30        }
31    }
32}
33
34impl EvaluationBudget {
35    /// Ensures a provider-backed evaluator may spend one provider call.
36    pub fn require_provider_call(&self) -> Result<(), AgentError> {
37        if self.max_provider_calls == 0 {
38            return Err(AgentError::contract_violation(
39                "evaluation budget allows zero provider calls",
40            ));
41        }
42        Ok(())
43    }
44}
45
46#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)]
47/// Usage captured by an evaluator run.
48pub struct EvaluationUsage {
49    /// Number of provider calls made by this evaluator.
50    pub provider_calls: u32,
51    /// Provider usage accounting when the adapter reports it.
52    pub provider_usage: Option<ProviderUsage>,
53}
54
55#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
56/// Request passed to an evaluator.
57pub struct EvaluationRequest {
58    /// Stable evaluation id for lineage and test output.
59    pub evaluation_id: EvaluationId,
60    /// Durable scope being evaluated.
61    pub scope: EvaluationScope,
62    /// Subjects being evaluated or compared.
63    pub subjects: Vec<EvaluationSubject>,
64    /// Expected outcome supplied by a test, host, or reviewer.
65    pub expected_outcome: ExpectedOutcome,
66    /// Comparison design for the evaluation.
67    pub comparison: ComparisonDesign,
68    /// Deterministic metric deltas supplied by local evaluators or tests.
69    pub metric_deltas: Vec<EvaluationMetricDelta>,
70    /// Budget for evaluator work.
71    pub budget: EvaluationBudget,
72    /// Bounded request summary safe for logs and prompts.
73    pub redacted_summary: String,
74}
75
76impl EvaluationRequest {
77    /// Creates an evaluation request with observed-only comparison defaults.
78    pub fn new(
79        evaluation_id: EvaluationId,
80        scope: EvaluationScope,
81        expected_outcome: ExpectedOutcome,
82    ) -> Self {
83        Self {
84            evaluation_id,
85            scope,
86            subjects: Vec::new(),
87            redacted_summary: expected_outcome.redacted_summary.clone(),
88            expected_outcome,
89            comparison: ComparisonDesign::ObservedOnly,
90            metric_deltas: Vec::new(),
91            budget: EvaluationBudget::default(),
92        }
93    }
94
95    /// Returns this request with one subject appended.
96    pub fn with_subject(mut self, subject: EvaluationSubject) -> Self {
97        self.subjects.push(subject);
98        self
99    }
100
101    /// Returns this request with its comparison design replaced.
102    pub fn with_comparison(mut self, comparison: ComparisonDesign) -> Self {
103        self.comparison = comparison;
104        self
105    }
106
107    /// Returns this request with one deterministic metric delta appended.
108    pub fn with_metric_delta(mut self, metric_delta: EvaluationMetricDelta) -> Self {
109        self.metric_deltas.push(metric_delta);
110        self
111    }
112
113    /// Returns this request with deterministic metric deltas appended.
114    pub fn with_metric_deltas(
115        mut self,
116        metric_deltas: impl IntoIterator<Item = EvaluationMetricDelta>,
117    ) -> Self {
118        self.metric_deltas.extend(metric_deltas);
119        self
120    }
121
122    /// Returns this request with its budget replaced.
123    pub fn with_budget(mut self, budget: EvaluationBudget) -> Self {
124        self.budget = budget;
125        self
126    }
127}