swink_agent_eval/evaluators/
safety.rs1#![forbid(unsafe_code)]
16#![cfg(feature = "evaluator-safety")]
17
18use std::sync::Arc;
19
20use serde::{Deserialize, Serialize};
21
22use crate::aggregator::AllPass;
23use crate::evaluator::Evaluator;
24use crate::types::{EvalCase, EvalMetricResult, Invocation};
25
26use super::{JudgeEvaluatorConfig, build_prompt_context, evaluate_with_builtin};
27
28fn has_final_response(_case: &EvalCase, invocation: &Invocation) -> bool {
29 invocation
30 .final_response
31 .as_deref()
32 .is_some_and(|s| !s.trim().is_empty())
33}
34
35fn has_user_prompt(case: &EvalCase, _invocation: &Invocation) -> bool {
36 !case.user_messages.is_empty()
37}
38
39fn with_safety_default(config: JudgeEvaluatorConfig) -> JudgeEvaluatorConfig {
41 if config.aggregator.is_some() {
42 config
43 } else {
44 config.with_aggregator(Arc::new(AllPass))
45 }
46}
47
48macro_rules! safety_evaluator {
49 (
50 $(#[$meta:meta])*
51 $name:ident, $eval_name:literal, $template:literal, $criterion:expr
52 ) => {
53 $(#[$meta])*
54 pub struct $name {
55 config: JudgeEvaluatorConfig,
56 }
57
58 impl $name {
59 #[must_use]
63 pub fn new(config: JudgeEvaluatorConfig) -> Self {
64 Self {
65 config: with_safety_default(config),
66 }
67 }
68
69 #[must_use]
71 pub fn with_prompt(mut self, template: Arc<dyn crate::prompt::JudgePromptTemplate>) -> Self {
72 self.config = self.config.with_prompt(template);
73 self
74 }
75
76 #[must_use]
79 pub fn with_few_shot(mut self, examples: Vec<crate::types::FewShotExample>) -> Self {
80 self.config = self.config.with_few_shot(examples);
81 self
82 }
83
84 #[must_use]
86 pub fn with_system_prompt(mut self, prompt: impl Into<String>) -> Self {
87 self.config = self.config.with_system_prompt(prompt);
88 self
89 }
90
91 #[must_use]
93 pub fn with_output_schema(mut self, schema: serde_json::Value) -> Self {
94 self.config = self.config.with_output_schema(schema);
95 self
96 }
97
98 #[must_use]
100 pub fn with_use_reasoning(mut self, flag: bool) -> Self {
101 self.config = self.config.with_use_reasoning(flag);
102 self
103 }
104
105 #[must_use]
107 pub fn with_feedback_key(mut self, key: impl Into<String>) -> Self {
108 self.config = self.config.with_feedback_key(key);
109 self
110 }
111
112 #[must_use]
114 pub const fn config(&self) -> &JudgeEvaluatorConfig {
115 &self.config
116 }
117 }
118
119 impl $crate::evaluators::JudgeEvaluatorBuilder for $name {
120 fn judge_config_mut(&mut self) -> &mut JudgeEvaluatorConfig {
121 &mut self.config
122 }
123 }
124
125 impl Evaluator for $name {
126 fn name(&self) -> &'static str {
127 $eval_name
128 }
129
130 fn evaluate(
131 &self,
132 case: &EvalCase,
133 invocation: &Invocation,
134 ) -> Option<EvalMetricResult> {
135 let criterion: fn(&EvalCase, &Invocation) -> bool = $criterion;
136 if !criterion(case, invocation) {
137 return None;
138 }
139
140 Some(evaluate_with_builtin(
141 $eval_name,
142 $template,
143 &self.config,
144 &build_prompt_context(&self.config, case, invocation),
145 ))
146 }
147 }
148 };
149}
150
151safety_evaluator! {
152 HarmfulnessEvaluator,
154 "harmfulness",
155 "harmfulness_v0",
156 has_final_response
157}
158
159safety_evaluator! {
160 ToxicityEvaluator,
163 "toxicity",
164 "toxicity_v0",
165 has_final_response
166}
167
168safety_evaluator! {
169 FairnessEvaluator,
171 "fairness",
172 "fairness_v0",
173 has_final_response
174}
175
176safety_evaluator! {
177 PromptInjectionEvaluator,
181 "prompt_injection",
182 "prompt_injection_v0",
183 has_user_prompt
184}
185
186safety_evaluator! {
187 CodeInjectionEvaluator,
190 "code_injection",
191 "code_injection_v0",
192 has_user_prompt
193}
194
195#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
200#[serde(rename_all = "snake_case")]
201pub enum PIIClass {
202 Email,
203 Phone,
204 Ssn,
206 CreditCard,
207 IpAddress,
208 ApiKey,
209 PersonalName,
210 Address,
211 Other(String),
213}
214
215impl PIIClass {
216 #[must_use]
218 pub fn canonical_name(&self) -> String {
219 match self {
220 Self::Email => "email".into(),
221 Self::Phone => "phone".into(),
222 Self::Ssn => "ssn".into(),
223 Self::CreditCard => "credit_card".into(),
224 Self::IpAddress => "ip_address".into(),
225 Self::ApiKey => "api_key".into(),
226 Self::PersonalName => "personal_name".into(),
227 Self::Address => "address".into(),
228 Self::Other(name) => name.clone(),
229 }
230 }
231
232 #[must_use]
235 pub fn all_builtin() -> Vec<Self> {
236 vec![
237 Self::Email,
238 Self::Phone,
239 Self::Ssn,
240 Self::CreditCard,
241 Self::IpAddress,
242 Self::ApiKey,
243 Self::PersonalName,
244 Self::Address,
245 ]
246 }
247}
248
249pub struct PIILeakageEvaluator {
254 config: JudgeEvaluatorConfig,
255 entity_classes: Vec<PIIClass>,
256}
257
258impl PIILeakageEvaluator {
259 #[must_use]
261 pub fn new(config: JudgeEvaluatorConfig) -> Self {
262 Self {
263 config: with_safety_default(config),
264 entity_classes: PIIClass::all_builtin(),
265 }
266 }
267
268 #[must_use]
272 pub fn with_classes(config: JudgeEvaluatorConfig, entity_classes: Vec<PIIClass>) -> Self {
273 Self {
274 config: with_safety_default(config),
275 entity_classes,
276 }
277 }
278
279 #[must_use]
281 pub fn with_prompt(mut self, template: Arc<dyn crate::prompt::JudgePromptTemplate>) -> Self {
282 self.config = self.config.with_prompt(template);
283 self
284 }
285
286 #[must_use]
289 pub fn with_few_shot(mut self, examples: Vec<crate::types::FewShotExample>) -> Self {
290 self.config = self.config.with_few_shot(examples);
291 self
292 }
293
294 #[must_use]
296 pub fn with_system_prompt(mut self, prompt: impl Into<String>) -> Self {
297 self.config = self.config.with_system_prompt(prompt);
298 self
299 }
300
301 #[must_use]
303 pub fn with_output_schema(mut self, schema: serde_json::Value) -> Self {
304 self.config = self.config.with_output_schema(schema);
305 self
306 }
307
308 #[must_use]
310 pub fn with_use_reasoning(mut self, flag: bool) -> Self {
311 self.config = self.config.with_use_reasoning(flag);
312 self
313 }
314
315 #[must_use]
317 pub fn with_feedback_key(mut self, key: impl Into<String>) -> Self {
318 self.config = self.config.with_feedback_key(key);
319 self
320 }
321
322 #[must_use]
324 pub fn entity_classes(&self) -> &[PIIClass] {
325 &self.entity_classes
326 }
327
328 #[must_use]
330 pub const fn config(&self) -> &JudgeEvaluatorConfig {
331 &self.config
332 }
333}
334
335impl crate::evaluators::JudgeEvaluatorBuilder for PIILeakageEvaluator {
336 fn judge_config_mut(&mut self) -> &mut JudgeEvaluatorConfig {
337 &mut self.config
338 }
339}
340
341impl Evaluator for PIILeakageEvaluator {
342 fn name(&self) -> &'static str {
343 "pii_leakage"
344 }
345
346 fn evaluate(&self, case: &EvalCase, invocation: &Invocation) -> Option<EvalMetricResult> {
347 if !has_final_response(case, invocation) {
348 return None;
349 }
350
351 let mut ctx = build_prompt_context(&self.config, case, invocation);
355 let classes: Vec<serde_json::Value> = self
356 .entity_classes
357 .iter()
358 .map(|c| serde_json::Value::String(c.canonical_name()))
359 .collect();
360 ctx = ctx.with_custom(std::collections::HashMap::from([(
361 "pii_entity_classes".to_string(),
362 serde_json::Value::Array(classes),
363 )]));
364
365 Some(evaluate_with_builtin(
366 "pii_leakage",
367 "pii_leakage_v0",
368 &self.config,
369 &ctx,
370 ))
371 }
372}