Skip to main content

agent_sdk_eval/
evidence.rs

1//! Evidence bundles derived from core traces.
2
3use serde::{Deserialize, Serialize};
4
5use agent_sdk_core::{
6    AgentError, EntityKind, EntityRef, PolicyRef, PrivacyClass, RetentionClass, RunTrace,
7    SessionTimeline, TurnTrace,
8};
9
10use crate::EvaluationScope;
11
12#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
13#[serde(rename_all = "snake_case")]
14/// Role one evidence item plays in an evaluation bundle.
15pub enum EvidenceRole {
16    /// User input or source message.
17    Input,
18    /// Context projection or admitted context item.
19    Context,
20    /// Tool call evidence.
21    Tool,
22    /// Model attempt evidence.
23    Model,
24    /// Terminal output or result marker evidence.
25    Output,
26    /// Effect intent/result evidence.
27    Effect,
28    /// Policy or runtime constraint evidence.
29    Policy,
30    /// Expected outcome evidence supplied by a test or reviewer.
31    ExpectedOutcome,
32    /// Baseline or comparison evidence.
33    Baseline,
34    /// Other product-neutral evidence.
35    Other,
36}
37
38#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
39/// One model-visible or evaluator-visible evidence ref.
40pub struct EvidenceItem {
41    /// Entity ref used for cited support validation.
42    pub evidence_ref: EntityRef,
43    /// Role this item plays in the evaluation.
44    pub role: EvidenceRole,
45    /// Bounded summary safe for evaluator prompts and logs.
46    pub redacted_summary: String,
47    /// Privacy class for projection and storage decisions.
48    pub privacy_class: PrivacyClass,
49    /// Retention class for downstream storage decisions.
50    pub retention_class: RetentionClass,
51    /// Refs this item was derived from.
52    pub derived_from: Vec<EntityRef>,
53}
54
55impl EvidenceItem {
56    /// Creates an evidence item with content-ref-only privacy defaults.
57    pub fn new(
58        evidence_ref: EntityRef,
59        role: EvidenceRole,
60        redacted_summary: impl Into<String>,
61    ) -> Self {
62        Self {
63            evidence_ref,
64            role,
65            redacted_summary: redacted_summary.into(),
66            privacy_class: PrivacyClass::ContentRefsOnly,
67            retention_class: RetentionClass::RunScoped,
68            derived_from: Vec::new(),
69        }
70    }
71}
72
73#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
74/// Result of validating evaluator-cited support refs against available evidence.
75pub struct SupportRefValidation {
76    /// Cited refs that matched an available evidence item.
77    pub accepted_refs: Vec<EntityRef>,
78    /// Cited refs that were not available to the evaluator.
79    pub rejected_refs: Vec<EntityRef>,
80}
81
82#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
83/// Evidence supplied to an evaluator.
84pub struct EvidenceBundle {
85    /// Scope this bundle covers.
86    pub scope: EvaluationScope,
87    /// Evidence refs available for cited support.
88    pub items: Vec<EvidenceItem>,
89    /// Optional outcome ref being evaluated.
90    pub outcome_ref: Option<EntityRef>,
91    /// Bounded bundle summary safe for prompts and logs.
92    pub redacted_summary: String,
93    /// Policy refs that governed this evidence projection.
94    pub policy_refs: Vec<PolicyRef>,
95    /// Privacy class for the bundle.
96    pub privacy_class: PrivacyClass,
97    /// Retention class for the bundle.
98    pub retention_class: RetentionClass,
99}
100
101impl EvidenceBundle {
102    /// Creates an empty evidence bundle.
103    pub fn new(scope: EvaluationScope, redacted_summary: impl Into<String>) -> Self {
104        Self {
105            scope,
106            items: Vec::new(),
107            outcome_ref: None,
108            redacted_summary: redacted_summary.into(),
109            policy_refs: Vec::new(),
110            privacy_class: PrivacyClass::ContentRefsOnly,
111            retention_class: RetentionClass::RunScoped,
112        }
113    }
114
115    /// Builds an evidence bundle from a core turn trace.
116    pub fn from_turn_trace(trace: &TurnTrace) -> Result<Self, AgentError> {
117        let turn_id = trace.turn_id.clone().ok_or_else(|| {
118            AgentError::contract_violation("turn trace is missing turn id for evaluation")
119        })?;
120        let mut bundle = Self::new(
121            EvaluationScope::Turn {
122                session_id: trace.session_id.clone(),
123                turn_id: turn_id.clone(),
124            },
125            "turn trace evidence",
126        );
127        bundle.outcome_ref = trace.run_ids.first().cloned().map(EntityRef::run);
128        bundle.push(EvidenceItem::new(
129            EntityRef::new(EntityKind::Turn, turn_id),
130            EvidenceRole::Input,
131            "turn envelope",
132        ));
133        for run_id in &trace.run_ids {
134            bundle.push(EvidenceItem::new(
135                EntityRef::run(run_id.clone()),
136                EvidenceRole::Output,
137                "run associated with turn",
138            ));
139        }
140        for attempt_id in &trace.attempt_ids {
141            bundle.push(EvidenceItem::new(
142                EntityRef::new(EntityKind::Attempt, attempt_id.clone()),
143                EvidenceRole::Model,
144                "model attempt",
145            ));
146        }
147        for message_id in &trace.message_ids {
148            bundle.push(EvidenceItem::new(
149                EntityRef::message(message_id.clone()),
150                EvidenceRole::Input,
151                "message envelope",
152            ));
153        }
154        for projection_id in &trace.context_projection_ids {
155            bundle.push(EvidenceItem::new(
156                EntityRef::new(EntityKind::ContextProjection, projection_id.clone()),
157                EvidenceRole::Context,
158                "context projection",
159            ));
160        }
161        for effect_id in &trace.effect_ids {
162            bundle.push(EvidenceItem::new(
163                EntityRef::new(EntityKind::Effect, effect_id.clone()),
164                EvidenceRole::Effect,
165                "effect evidence",
166            ));
167        }
168        for tool_call_id in &trace.tool_call_ids {
169            bundle.push(EvidenceItem::new(
170                EntityRef::new(EntityKind::ToolCall, tool_call_id.clone()),
171                EvidenceRole::Tool,
172                "tool call evidence",
173            ));
174        }
175        Ok(bundle)
176    }
177
178    /// Builds an evidence bundle from a core run trace.
179    pub fn from_run_trace(trace: &RunTrace) -> Result<Self, AgentError> {
180        let run_id = trace.run_id.clone().ok_or_else(|| {
181            AgentError::contract_violation("run trace is missing run id for evaluation")
182        })?;
183        let mut bundle = Self::new(
184            EvaluationScope::Run {
185                run_id: run_id.clone(),
186            },
187            "run trace evidence",
188        );
189        bundle.outcome_ref = Some(EntityRef::run(run_id.clone()));
190        bundle.push(EvidenceItem::new(
191            EntityRef::run(run_id),
192            EvidenceRole::Output,
193            "run envelope",
194        ));
195        for turn in &trace.turn_traces {
196            let turn_bundle = Self::from_turn_trace(turn)?;
197            for item in turn_bundle.items {
198                bundle.push(item);
199            }
200        }
201        Ok(bundle)
202    }
203
204    /// Builds an evidence bundle from a core session timeline.
205    pub fn from_session_timeline(timeline: &SessionTimeline) -> Result<Self, AgentError> {
206        let mut bundle = Self::new(
207            EvaluationScope::Session {
208                session_id: timeline.session_id.clone(),
209            },
210            "session timeline evidence",
211        );
212        for turn in &timeline.turns {
213            let turn_bundle = Self::from_turn_trace(turn)?;
214            if bundle.outcome_ref.is_none() {
215                bundle.outcome_ref = turn_bundle.outcome_ref.clone();
216            }
217            for item in turn_bundle.items {
218                bundle.push(item);
219            }
220        }
221        Ok(bundle)
222    }
223
224    /// Returns this bundle with an item appended, deduped by entity ref.
225    pub fn with_item(mut self, item: EvidenceItem) -> Self {
226        self.push(item);
227        self
228    }
229
230    /// Validates cited support refs against this bundle.
231    pub fn validate_support_refs(
232        &self,
233        support_refs: impl IntoIterator<Item = EntityRef>,
234        max_support_refs: usize,
235    ) -> SupportRefValidation {
236        let mut accepted_refs = Vec::new();
237        let mut rejected_refs = Vec::new();
238        for cited_ref in support_refs.into_iter().take(max_support_refs) {
239            if let Some(available_ref) = self
240                .items
241                .iter()
242                .map(|item| &item.evidence_ref)
243                .find(|available_ref| same_entity_ref(available_ref, &cited_ref))
244            {
245                push_unique(&mut accepted_refs, available_ref.clone());
246            } else {
247                push_unique(&mut rejected_refs, cited_ref);
248            }
249        }
250        SupportRefValidation {
251            accepted_refs,
252            rejected_refs,
253        }
254    }
255
256    fn push(&mut self, item: EvidenceItem) {
257        if !self
258            .items
259            .iter()
260            .any(|existing| same_entity_ref(&existing.evidence_ref, &item.evidence_ref))
261        {
262            self.items.push(item);
263        }
264    }
265}
266
267fn push_unique(items: &mut Vec<EntityRef>, value: EntityRef) {
268    if !items
269        .iter()
270        .any(|existing| same_entity_ref(existing, &value))
271    {
272        items.push(value);
273    }
274}
275
276pub(crate) fn same_entity_ref(left: &EntityRef, right: &EntityRef) -> bool {
277    left.kind == right.kind && left.id.as_str() == right.id.as_str()
278}