1use std::borrow::Cow;
40use std::time::Duration;
41
42use opentelemetry::global::BoxedTracer;
43#[allow(unused_imports)]
46use opentelemetry::trace::Span;
47use opentelemetry::trace::{
48 SpanBuilder, SpanKind, Status, TraceContextExt, Tracer, TracerProvider,
49};
50use opentelemetry::{Context, KeyValue, global};
51
52use crate::score::Verdict;
53use crate::types::{EvalCase, EvalCaseResult, EvalMetricResult, EvalSet};
54
55pub const SPAN_RUN_SET: &str = "swink.eval.run_set";
59pub const SPAN_CASE: &str = "swink.eval.case";
61pub const SPAN_EVALUATOR: &str = "swink.eval.evaluator";
63
64pub const ATTR_SET_ID: &str = "swink.eval.set_id";
66pub const ATTR_SET_NAME: &str = "swink.eval.set_name";
68pub const ATTR_CASE_COUNT: &str = "swink.eval.case_count";
70pub const ATTR_CASE_ID: &str = "swink.eval.case_id";
72pub const ATTR_CASE_NAME: &str = "swink.eval.case_name";
74pub const ATTR_EVALUATOR_NAME: &str = "swink.eval.evaluator_name";
76pub const ATTR_VERDICT: &str = "swink.eval.verdict";
78pub const ATTR_SCORE: &str = "swink.eval.score";
80pub const ATTR_SCORE_THRESHOLD: &str = "swink.eval.score_threshold";
82pub const ATTR_DURATION_MS: &str = "swink.eval.duration_ms";
84pub const ATTR_PASSED: &str = "swink.eval.passed";
86pub const ATTR_FAILED: &str = "swink.eval.failed";
88
89pub struct EvalsTelemetry {
99 tracer: BoxedTracer,
100}
101
102impl std::fmt::Debug for EvalsTelemetry {
103 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
104 f.debug_struct("EvalsTelemetry").finish_non_exhaustive()
105 }
106}
107
108impl EvalsTelemetry {
109 #[must_use]
111 pub fn builder() -> EvalsTelemetryBuilder {
112 EvalsTelemetryBuilder::default()
113 }
114
115 #[must_use]
118 pub fn tracer(&self) -> &BoxedTracer {
119 &self.tracer
120 }
121
122 pub(crate) fn start_run_set_span(&self, eval_set: &EvalSet) -> RunSetSpan {
129 let parent = Context::current();
130 let builder = SpanBuilder::from_name(Cow::Borrowed(SPAN_RUN_SET))
131 .with_kind(SpanKind::Internal)
132 .with_attributes(vec![
133 KeyValue::new(ATTR_SET_ID, eval_set.id.clone()),
134 KeyValue::new(ATTR_SET_NAME, eval_set.name.clone()),
135 KeyValue::new(
136 ATTR_CASE_COUNT,
137 i64::try_from(eval_set.cases.len()).unwrap_or(i64::MAX),
138 ),
139 ]);
140 let span = self.tracer.build_with_context(builder, &parent);
141 let cx = parent.with_span(span);
142 RunSetSpan {
143 context: cx,
144 set_id: eval_set.id.clone(),
145 }
146 }
147
148 pub(crate) fn start_case_span_raw(&self, parent: &RunSetSpanRef, case: &EvalCase) -> CaseSpan {
154 let builder = SpanBuilder::from_name(Cow::Borrowed(SPAN_CASE))
155 .with_kind(SpanKind::Internal)
156 .with_attributes(vec![
157 KeyValue::new(ATTR_SET_ID, parent.set_id.clone()),
158 KeyValue::new(ATTR_CASE_ID, case.id.clone()),
159 KeyValue::new(ATTR_CASE_NAME, case.name.clone()),
160 ]);
161 let span = self.tracer.build_with_context(builder, &parent.context);
162 let cx = parent.context.with_span(span);
163 CaseSpan {
164 context: cx,
165 set_id: parent.set_id.clone(),
166 case_id: case.id.clone(),
167 }
168 }
169
170 pub(crate) fn start_evaluator_span(
172 &self,
173 parent: &CaseSpan,
174 evaluator_name: &str,
175 ) -> EvaluatorSpan {
176 let builder = SpanBuilder::from_name(Cow::Borrowed(SPAN_EVALUATOR))
177 .with_kind(SpanKind::Internal)
178 .with_attributes(vec![
179 KeyValue::new(ATTR_SET_ID, parent.set_id.clone()),
180 KeyValue::new(ATTR_CASE_ID, parent.case_id.clone()),
181 KeyValue::new(ATTR_EVALUATOR_NAME, evaluator_name.to_string()),
182 ]);
183 let span = self.tracer.build_with_context(builder, &parent.context);
184 let cx = parent.context.with_span(span);
185 EvaluatorSpan { context: cx }
186 }
187}
188
189pub(crate) struct RunSetSpan {
193 context: Context,
194 #[allow(dead_code)] set_id: String,
196}
197
198#[derive(Clone)]
204pub(crate) struct RunSetSpanRef {
205 pub(crate) context: Context,
206 pub(crate) set_id: String,
207}
208
209impl RunSetSpan {
210 pub(crate) fn context(&self) -> &Context {
211 &self.context
212 }
213
214 pub(crate) fn end(self, passed: usize, failed: usize) {
216 let span = self.context.span();
217 span.set_attribute(KeyValue::new(
218 ATTR_PASSED,
219 i64::try_from(passed).unwrap_or(i64::MAX),
220 ));
221 span.set_attribute(KeyValue::new(
222 ATTR_FAILED,
223 i64::try_from(failed).unwrap_or(i64::MAX),
224 ));
225 if failed > 0 {
226 span.set_status(Status::error(format!("{failed} case(s) failed")));
227 } else {
228 span.set_status(Status::Ok);
229 }
230 span.end();
231 }
232}
233
234pub(crate) struct CaseSpan {
236 context: Context,
237 set_id: String,
238 case_id: String,
239}
240
241impl CaseSpan {
242 #[allow(dead_code)]
245 pub(crate) fn context(&self) -> &Context {
246 &self.context
247 }
248
249 pub(crate) fn end(self, result: &EvalCaseResult, duration: Duration) {
253 let span = self.context.span();
254 span.set_attribute(KeyValue::new(ATTR_VERDICT, verdict_str(result.verdict)));
255 #[allow(clippy::cast_possible_truncation)]
256 span.set_attribute(KeyValue::new(
257 ATTR_DURATION_MS,
258 duration.as_millis().min(i64::MAX as u128) as i64,
259 ));
260
261 if result.verdict.is_pass() {
262 span.set_status(Status::Ok);
263 } else {
264 let failing: Vec<String> = result
265 .metric_results
266 .iter()
267 .filter(|m| !m.score.verdict().is_pass())
268 .map(|m| {
269 let detail = m.details.clone().unwrap_or_default();
270 if detail.is_empty() {
271 m.evaluator_name.clone()
272 } else {
273 format!("{}: {}", m.evaluator_name, detail)
274 }
275 })
276 .collect();
277 let message = if failing.is_empty() {
278 format!("case `{}` failed", result.case_id)
279 } else {
280 format!("case `{}` failed: {}", result.case_id, failing.join(" | "))
281 };
282 span.add_event(
283 Cow::Borrowed("exception"),
284 vec![
285 KeyValue::new("exception.type", "EvalCaseFailure"),
286 KeyValue::new("exception.message", message.clone()),
287 ],
288 );
289 span.set_status(Status::error(message));
290 }
291 span.end();
292 }
293}
294
295pub(crate) struct EvaluatorSpan {
297 context: Context,
298}
299
300impl EvaluatorSpan {
301 pub(crate) fn end(self, metric: &EvalMetricResult) {
305 let span = self.context.span();
306 let verdict = metric.score.verdict();
307 span.set_attribute(KeyValue::new(ATTR_VERDICT, verdict_str(verdict)));
308 span.set_attribute(KeyValue::new(ATTR_SCORE, metric.score.value));
309 span.set_attribute(KeyValue::new(ATTR_SCORE_THRESHOLD, metric.score.threshold));
310 if let Some(detail) = &metric.details {
311 span.set_attribute(KeyValue::new("swink.eval.details", detail.clone()));
312 }
313 if verdict.is_pass() {
314 span.set_status(Status::Ok);
315 } else {
316 let message = metric
317 .details
318 .clone()
319 .unwrap_or_else(|| format!("evaluator `{}` failed", metric.evaluator_name));
320 span.add_event(
321 Cow::Borrowed("exception"),
322 vec![
323 KeyValue::new("exception.type", "EvaluatorFailure"),
324 KeyValue::new("exception.message", message.clone()),
325 ],
326 );
327 span.set_status(Status::error(message));
328 }
329 span.end();
330 }
331
332 pub(crate) fn end_inapplicable(self, evaluator_name: &str) {
335 let span = self.context.span();
336 span.set_attribute(KeyValue::new(
337 ATTR_EVALUATOR_NAME,
338 evaluator_name.to_string(),
339 ));
340 span.set_attribute(KeyValue::new(ATTR_VERDICT, "inapplicable"));
341 span.set_status(Status::Ok);
342 span.end();
343 }
344}
345
346fn verdict_str(verdict: Verdict) -> &'static str {
347 if verdict.is_pass() { "pass" } else { "fail" }
348}
349
350#[derive(Default)]
360pub struct EvalsTelemetryBuilder {
361 tracer: Option<BoxedTracer>,
362}
363
364impl EvalsTelemetryBuilder {
365 #[must_use]
369 pub fn with_tracer(mut self, tracer: BoxedTracer) -> Self {
370 self.tracer = Some(tracer);
371 self
372 }
373
374 #[must_use]
377 pub fn with_tracer_provider<S, T, P>(mut self, provider: &P) -> Self
378 where
379 S: opentelemetry::trace::Span + Send + Sync + 'static,
380 T: Tracer<Span = S> + Send + Sync + 'static,
381 P: TracerProvider<Tracer = T>,
382 {
383 let tracer = provider.tracer("swink.eval");
386 self.tracer = Some(BoxedTracer::new(Box::new(tracer)));
387 self
388 }
389
390 #[must_use]
393 pub fn build(self) -> EvalsTelemetry {
394 let tracer = self.tracer.unwrap_or_else(|| global::tracer("swink.eval"));
395 EvalsTelemetry { tracer }
396 }
397}
398
399#[cfg(test)]
402mod tests {
403 use super::*;
404 use opentelemetry_sdk::trace::{InMemorySpanExporter, SdkTracerProvider};
405
406 fn fresh_provider() -> (SdkTracerProvider, InMemorySpanExporter) {
407 let exporter = InMemorySpanExporter::default();
408 let provider = SdkTracerProvider::builder()
409 .with_simple_exporter(exporter.clone())
410 .build();
411 (provider, exporter)
412 }
413
414 #[test]
415 fn builder_uses_injected_tracer() {
416 let (provider, exporter) = fresh_provider();
417 let telemetry = EvalsTelemetry::builder()
418 .with_tracer_provider(&provider)
419 .build();
420 let mut span = telemetry.tracer().start("selftest");
422 span.end();
423 provider.force_flush().expect("flush ok");
424 let spans = exporter.get_finished_spans().expect("get spans");
425 assert!(spans.iter().any(|s| s.name == "selftest"));
426 }
427
428 #[test]
429 fn verdict_str_rendering() {
430 assert_eq!(verdict_str(Verdict::Pass), "pass");
431 assert_eq!(verdict_str(Verdict::Fail), "fail");
432 }
433}