use std::borrow::Cow;
use std::time::Duration;
use opentelemetry::global::BoxedTracer;
#[allow(unused_imports)]
use opentelemetry::trace::Span;
use opentelemetry::trace::{
SpanBuilder, SpanKind, Status, TraceContextExt, Tracer, TracerProvider,
};
use opentelemetry::{Context, KeyValue, global};
use crate::score::Verdict;
use crate::types::{EvalCase, EvalCaseResult, EvalMetricResult, EvalSet};
pub const SPAN_RUN_SET: &str = "swink.eval.run_set";
pub const SPAN_CASE: &str = "swink.eval.case";
pub const SPAN_EVALUATOR: &str = "swink.eval.evaluator";
pub const ATTR_SET_ID: &str = "swink.eval.set_id";
pub const ATTR_SET_NAME: &str = "swink.eval.set_name";
pub const ATTR_CASE_COUNT: &str = "swink.eval.case_count";
pub const ATTR_CASE_ID: &str = "swink.eval.case_id";
pub const ATTR_CASE_NAME: &str = "swink.eval.case_name";
pub const ATTR_EVALUATOR_NAME: &str = "swink.eval.evaluator_name";
pub const ATTR_VERDICT: &str = "swink.eval.verdict";
pub const ATTR_SCORE: &str = "swink.eval.score";
pub const ATTR_SCORE_THRESHOLD: &str = "swink.eval.score_threshold";
pub const ATTR_DURATION_MS: &str = "swink.eval.duration_ms";
pub const ATTR_PASSED: &str = "swink.eval.passed";
pub const ATTR_FAILED: &str = "swink.eval.failed";
pub struct EvalsTelemetry {
tracer: BoxedTracer,
}
impl std::fmt::Debug for EvalsTelemetry {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("EvalsTelemetry").finish_non_exhaustive()
}
}
impl EvalsTelemetry {
#[must_use]
pub fn builder() -> EvalsTelemetryBuilder {
EvalsTelemetryBuilder::default()
}
#[must_use]
pub fn tracer(&self) -> &BoxedTracer {
&self.tracer
}
pub(crate) fn start_run_set_span(&self, eval_set: &EvalSet) -> RunSetSpan {
let parent = Context::current();
let builder = SpanBuilder::from_name(Cow::Borrowed(SPAN_RUN_SET))
.with_kind(SpanKind::Internal)
.with_attributes(vec![
KeyValue::new(ATTR_SET_ID, eval_set.id.clone()),
KeyValue::new(ATTR_SET_NAME, eval_set.name.clone()),
KeyValue::new(
ATTR_CASE_COUNT,
i64::try_from(eval_set.cases.len()).unwrap_or(i64::MAX),
),
]);
let span = self.tracer.build_with_context(builder, &parent);
let cx = parent.with_span(span);
RunSetSpan {
context: cx,
set_id: eval_set.id.clone(),
}
}
pub(crate) fn start_case_span_raw(&self, parent: &RunSetSpanRef, case: &EvalCase) -> CaseSpan {
let builder = SpanBuilder::from_name(Cow::Borrowed(SPAN_CASE))
.with_kind(SpanKind::Internal)
.with_attributes(vec![
KeyValue::new(ATTR_SET_ID, parent.set_id.clone()),
KeyValue::new(ATTR_CASE_ID, case.id.clone()),
KeyValue::new(ATTR_CASE_NAME, case.name.clone()),
]);
let span = self.tracer.build_with_context(builder, &parent.context);
let cx = parent.context.with_span(span);
CaseSpan {
context: cx,
set_id: parent.set_id.clone(),
case_id: case.id.clone(),
}
}
pub(crate) fn start_evaluator_span(
&self,
parent: &CaseSpan,
evaluator_name: &str,
) -> EvaluatorSpan {
let builder = SpanBuilder::from_name(Cow::Borrowed(SPAN_EVALUATOR))
.with_kind(SpanKind::Internal)
.with_attributes(vec![
KeyValue::new(ATTR_SET_ID, parent.set_id.clone()),
KeyValue::new(ATTR_CASE_ID, parent.case_id.clone()),
KeyValue::new(ATTR_EVALUATOR_NAME, evaluator_name.to_string()),
]);
let span = self.tracer.build_with_context(builder, &parent.context);
let cx = parent.context.with_span(span);
EvaluatorSpan { context: cx }
}
}
pub(crate) struct RunSetSpan {
context: Context,
#[allow(dead_code)] set_id: String,
}
#[derive(Clone)]
pub(crate) struct RunSetSpanRef {
pub(crate) context: Context,
pub(crate) set_id: String,
}
impl RunSetSpan {
pub(crate) fn context(&self) -> &Context {
&self.context
}
pub(crate) fn end(self, passed: usize, failed: usize) {
let span = self.context.span();
span.set_attribute(KeyValue::new(
ATTR_PASSED,
i64::try_from(passed).unwrap_or(i64::MAX),
));
span.set_attribute(KeyValue::new(
ATTR_FAILED,
i64::try_from(failed).unwrap_or(i64::MAX),
));
if failed > 0 {
span.set_status(Status::error(format!("{failed} case(s) failed")));
} else {
span.set_status(Status::Ok);
}
span.end();
}
}
pub(crate) struct CaseSpan {
context: Context,
set_id: String,
case_id: String,
}
impl CaseSpan {
#[allow(dead_code)]
pub(crate) fn context(&self) -> &Context {
&self.context
}
pub(crate) fn end(self, result: &EvalCaseResult, duration: Duration) {
let span = self.context.span();
span.set_attribute(KeyValue::new(ATTR_VERDICT, verdict_str(result.verdict)));
#[allow(clippy::cast_possible_truncation)]
span.set_attribute(KeyValue::new(
ATTR_DURATION_MS,
duration.as_millis().min(i64::MAX as u128) as i64,
));
if result.verdict.is_pass() {
span.set_status(Status::Ok);
} else {
let failing: Vec<String> = result
.metric_results
.iter()
.filter(|m| !m.score.verdict().is_pass())
.map(|m| {
let detail = m.details.clone().unwrap_or_default();
if detail.is_empty() {
m.evaluator_name.clone()
} else {
format!("{}: {}", m.evaluator_name, detail)
}
})
.collect();
let message = if failing.is_empty() {
format!("case `{}` failed", result.case_id)
} else {
format!("case `{}` failed: {}", result.case_id, failing.join(" | "))
};
span.add_event(
Cow::Borrowed("exception"),
vec![
KeyValue::new("exception.type", "EvalCaseFailure"),
KeyValue::new("exception.message", message.clone()),
],
);
span.set_status(Status::error(message));
}
span.end();
}
}
pub(crate) struct EvaluatorSpan {
context: Context,
}
impl EvaluatorSpan {
pub(crate) fn end(self, metric: &EvalMetricResult) {
let span = self.context.span();
let verdict = metric.score.verdict();
span.set_attribute(KeyValue::new(ATTR_VERDICT, verdict_str(verdict)));
span.set_attribute(KeyValue::new(ATTR_SCORE, metric.score.value));
span.set_attribute(KeyValue::new(ATTR_SCORE_THRESHOLD, metric.score.threshold));
if let Some(detail) = &metric.details {
span.set_attribute(KeyValue::new("swink.eval.details", detail.clone()));
}
if verdict.is_pass() {
span.set_status(Status::Ok);
} else {
let message = metric
.details
.clone()
.unwrap_or_else(|| format!("evaluator `{}` failed", metric.evaluator_name));
span.add_event(
Cow::Borrowed("exception"),
vec![
KeyValue::new("exception.type", "EvaluatorFailure"),
KeyValue::new("exception.message", message.clone()),
],
);
span.set_status(Status::error(message));
}
span.end();
}
pub(crate) fn end_inapplicable(self, evaluator_name: &str) {
let span = self.context.span();
span.set_attribute(KeyValue::new(
ATTR_EVALUATOR_NAME,
evaluator_name.to_string(),
));
span.set_attribute(KeyValue::new(ATTR_VERDICT, "inapplicable"));
span.set_status(Status::Ok);
span.end();
}
}
fn verdict_str(verdict: Verdict) -> &'static str {
if verdict.is_pass() { "pass" } else { "fail" }
}
#[derive(Default)]
pub struct EvalsTelemetryBuilder {
tracer: Option<BoxedTracer>,
}
impl EvalsTelemetryBuilder {
#[must_use]
pub fn with_tracer(mut self, tracer: BoxedTracer) -> Self {
self.tracer = Some(tracer);
self
}
#[must_use]
pub fn with_tracer_provider<S, T, P>(mut self, provider: &P) -> Self
where
S: opentelemetry::trace::Span + Send + Sync + 'static,
T: Tracer<Span = S> + Send + Sync + 'static,
P: TracerProvider<Tracer = T>,
{
let tracer = provider.tracer("swink.eval");
self.tracer = Some(BoxedTracer::new(Box::new(tracer)));
self
}
#[must_use]
pub fn build(self) -> EvalsTelemetry {
let tracer = self.tracer.unwrap_or_else(|| global::tracer("swink.eval"));
EvalsTelemetry { tracer }
}
}
#[cfg(test)]
mod tests {
use super::*;
use opentelemetry_sdk::trace::{InMemorySpanExporter, SdkTracerProvider};
fn fresh_provider() -> (SdkTracerProvider, InMemorySpanExporter) {
let exporter = InMemorySpanExporter::default();
let provider = SdkTracerProvider::builder()
.with_simple_exporter(exporter.clone())
.build();
(provider, exporter)
}
#[test]
fn builder_uses_injected_tracer() {
let (provider, exporter) = fresh_provider();
let telemetry = EvalsTelemetry::builder()
.with_tracer_provider(&provider)
.build();
let mut span = telemetry.tracer().start("selftest");
span.end();
provider.force_flush().expect("flush ok");
let spans = exporter.get_finished_spans().expect("get spans");
assert!(spans.iter().any(|s| s.name == "selftest"));
}
#[test]
fn verdict_str_rendering() {
assert_eq!(verdict_str(Verdict::Pass), "pass");
assert_eq!(verdict_str(Verdict::Fail), "fail");
}
}