agent_sdk_eval/
comparison.rs1use serde::{Deserialize, Serialize};
4
5use agent_sdk_core::EntityRef;
6
7use crate::EvaluationScope;
8
9#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)]
10#[serde(tag = "design", rename_all = "snake_case")]
11pub enum ComparisonDesign {
13 #[default]
15 ObservedOnly,
16 ExpectedOutcome,
18 BaselineRun {
20 baseline_ref: EntityRef,
22 },
23 PairedRuns {
25 observed_ref: EntityRef,
27 comparison_ref: EntityRef,
29 },
30 PairedScopes {
32 observed_scope: EvaluationScope,
34 comparison_scope: EvaluationScope,
36 },
37 Ablation {
39 removed_refs: Vec<EntityRef>,
41 },
42 Counterfactual {
44 redacted_summary: String,
46 },
47 RepeatedExperiment {
49 cohort_ref: EntityRef,
51 },
52}
53
54impl ComparisonDesign {
55 pub fn supports_measured_confidence(&self) -> bool {
58 matches!(
59 self,
60 Self::BaselineRun { .. }
61 | Self::PairedRuns { .. }
62 | Self::PairedScopes { .. }
63 | Self::Ablation { .. }
64 | Self::RepeatedExperiment { .. }
65 )
66 }
67
68 pub fn comparison_refs(&self) -> Vec<EntityRef> {
70 match self {
71 Self::ObservedOnly | Self::ExpectedOutcome | Self::Counterfactual { .. } => Vec::new(),
72 Self::BaselineRun { baseline_ref } => vec![baseline_ref.clone()],
73 Self::PairedRuns {
74 observed_ref,
75 comparison_ref,
76 } => vec![observed_ref.clone(), comparison_ref.clone()],
77 Self::PairedScopes { .. } => Vec::new(),
78 Self::Ablation { removed_refs } => removed_refs.clone(),
79 Self::RepeatedExperiment { cohort_ref } => vec![cohort_ref.clone()],
80 }
81 }
82
83 pub fn has_comparison_evidence(&self) -> bool {
85 matches!(self, Self::PairedScopes { .. }) || !self.comparison_refs().is_empty()
86 }
87}