use std::sync::{Arc, RwLock};
use opentelemetry::global;
use opentelemetry::metrics::{Counter, Histogram};
const TOKEN_USAGE_BUCKETS: &[f64] = &[
1.0,
4.0,
16.0,
64.0,
256.0,
1024.0,
4096.0,
16_384.0,
65_536.0,
262_144.0,
1_048_576.0,
4_194_304.0,
16_777_216.0,
67_108_864.0,
];
const SHORT_DURATION_BUCKETS_S: &[f64] = &[
0.01, 0.02, 0.04, 0.08, 0.16, 0.32, 0.64, 1.28, 2.56, 5.12, 10.24, 20.48, 40.96, 81.92,
];
const TOOL_DURATION_BUCKETS_MS: &[f64] = &[
1.0, 5.0, 10.0, 50.0, 100.0, 500.0, 1_000.0, 5_000.0, 10_000.0, 60_000.0, 300_000.0,
];
#[derive(Debug)]
pub struct Metrics {
pub(crate) token_usage: Histogram<u64>,
pub(crate) operation_duration: Histogram<f64>,
pub(crate) time_to_first_chunk: Histogram<f64>,
pub(crate) time_per_output_chunk: Histogram<f64>,
pub(crate) turns_duration: Histogram<f64>,
pub(crate) runs_outcome: Counter<u64>,
pub(crate) tools_execution_duration: Histogram<f64>,
pub(crate) tools_execution_count: Counter<u64>,
pub(crate) context_compaction: Counter<u64>,
pub(crate) context_compaction_tokens_saved: Histogram<u64>,
pub(crate) subagent_invocations: Counter<u64>,
#[cfg(feature = "mcp")]
pub(crate) mcp_requests_duration: Histogram<f64>,
pub(crate) llm_retries: Counter<u64>,
}
static METRICS: RwLock<Option<Arc<Metrics>>> = RwLock::new(None);
impl Metrics {
#[must_use]
pub fn init(name: &'static str) -> Arc<Self> {
if let Some(existing) = read_cached() {
return existing;
}
let built = Arc::new(Self::build(name));
write_cached(Arc::clone(&built));
built
}
#[must_use]
pub fn global() -> Arc<Self> {
Self::init(env!("CARGO_PKG_NAME"))
}
pub fn reset_for_testing() {
let mut guard = match METRICS.write() {
Ok(g) => g,
Err(poisoned) => poisoned.into_inner(),
};
*guard = None;
}
pub fn record_chat_token_usage(
&self,
usage: &crate::llm::Usage,
provider_name: &'static str,
request_model: &str,
response_model: &str,
) {
use opentelemetry::KeyValue;
let entries: [(u32, &'static str); 4] = [
(usage.input_tokens, "input"),
(usage.output_tokens, "output"),
(usage.cached_input_tokens, "cache_read"),
(usage.cache_creation_input_tokens, "cache_creation"),
];
for (count, token_type) in entries {
if count == 0 {
continue;
}
self.token_usage.record(
u64::from(count),
&[
KeyValue::new(super::attrs::GEN_AI_OPERATION_NAME, "chat"),
KeyValue::new(super::attrs::GEN_AI_PROVIDER_NAME, provider_name),
KeyValue::new("gen_ai.token.type", token_type),
KeyValue::new(
super::attrs::GEN_AI_REQUEST_MODEL,
request_model.to_string(),
),
KeyValue::new(
super::attrs::GEN_AI_RESPONSE_MODEL,
response_model.to_string(),
),
],
);
}
}
pub fn record_chat_operation_duration_success(
&self,
elapsed_secs: f64,
provider_name: &'static str,
request_model: &str,
response_model: &str,
) {
use opentelemetry::KeyValue;
self.operation_duration.record(
elapsed_secs,
&[
KeyValue::new(super::attrs::GEN_AI_OPERATION_NAME, "chat"),
KeyValue::new(super::attrs::GEN_AI_PROVIDER_NAME, provider_name),
KeyValue::new(
super::attrs::GEN_AI_REQUEST_MODEL,
request_model.to_string(),
),
KeyValue::new(
super::attrs::GEN_AI_RESPONSE_MODEL,
response_model.to_string(),
),
],
);
}
pub fn record_chat_operation_duration_error(
&self,
elapsed_secs: f64,
provider_name: &'static str,
request_model: &str,
error_type: &'static str,
) {
use opentelemetry::KeyValue;
self.operation_duration.record(
elapsed_secs,
&[
KeyValue::new(super::attrs::GEN_AI_OPERATION_NAME, "chat"),
KeyValue::new(super::attrs::GEN_AI_PROVIDER_NAME, provider_name),
KeyValue::new(
super::attrs::GEN_AI_REQUEST_MODEL,
request_model.to_string(),
),
KeyValue::new(super::attrs::ERROR_TYPE, error_type),
],
);
}
pub fn record_tool_execution(
&self,
tool_name: &str,
tool_kind: &'static str,
outcome: &'static str,
duration_ms: Option<u64>,
) {
use opentelemetry::KeyValue;
let metric_attrs = [
KeyValue::new(super::attrs::GEN_AI_TOOL_NAME, tool_name.to_string()),
KeyValue::new(super::attrs::SDK_TOOL_KIND, tool_kind),
KeyValue::new(super::attrs::SDK_TOOL_OUTCOME, outcome),
];
self.tools_execution_count.add(1, &metric_attrs);
if let Some(ms) = duration_ms {
self.tools_execution_duration
.record(tool_duration_ms_to_f64(ms), &metric_attrs);
}
}
fn build(scope: &'static str) -> Self {
let meter = global::meter(scope);
let token_usage = meter
.u64_histogram("gen_ai.client.token.usage")
.with_unit("{token}")
.with_description("Number of input and output tokens used.")
.with_boundaries(TOKEN_USAGE_BUCKETS.to_vec())
.build();
let operation_duration = meter
.f64_histogram("gen_ai.client.operation.duration")
.with_unit("s")
.with_description("GenAI operation duration.")
.with_boundaries(SHORT_DURATION_BUCKETS_S.to_vec())
.build();
let time_to_first_chunk = meter
.f64_histogram("gen_ai.client.operation.time_to_first_chunk")
.with_unit("s")
.with_description("Time to first response chunk for streaming GenAI operations.")
.with_boundaries(SHORT_DURATION_BUCKETS_S.to_vec())
.build();
let time_per_output_chunk = meter
.f64_histogram("gen_ai.client.operation.time_per_output_chunk")
.with_unit("s")
.with_description(
"Time per output chunk after the first chunk for streaming GenAI operations.",
)
.with_boundaries(SHORT_DURATION_BUCKETS_S.to_vec())
.build();
let turns_duration = meter
.f64_histogram("agent_sdk.turns.duration")
.with_unit("s")
.with_description("Wall-clock duration of a single agent turn.")
.with_boundaries(SHORT_DURATION_BUCKETS_S.to_vec())
.build();
let runs_outcome = meter
.u64_counter("agent_sdk.runs.outcome")
.with_description("Count of completed agent runs by outcome.")
.build();
let tools_execution_duration = meter
.f64_histogram("agent_sdk.tools.execution.duration")
.with_unit("ms")
.with_description("Duration of a single tool invocation.")
.with_boundaries(TOOL_DURATION_BUCKETS_MS.to_vec())
.build();
let tools_execution_count = meter
.u64_counter("agent_sdk.tools.execution.count")
.with_description("Count of tool invocations by name, kind, and outcome.")
.build();
let context_compaction = meter
.u64_counter("agent_sdk.context.compaction")
.with_description("Count of context-compaction operations by trigger.")
.build();
let context_compaction_tokens_saved = meter
.u64_histogram("agent_sdk.context.compaction.tokens_saved")
.with_unit("{token}")
.with_description("Tokens saved by a single compaction operation.")
.with_boundaries(TOKEN_USAGE_BUCKETS.to_vec())
.build();
let subagent_invocations = meter
.u64_counter("agent_sdk.subagent.invocations")
.with_description("Count of subagent invocations by agent name and outcome.")
.build();
#[cfg(feature = "mcp")]
let mcp_requests_duration = meter
.f64_histogram("agent_sdk.mcp.requests.duration")
.with_unit("s")
.with_description("Duration of an MCP JSON-RPC client request.")
.with_boundaries(SHORT_DURATION_BUCKETS_S.to_vec())
.build();
let llm_retries = meter
.u64_counter("agent_sdk.llm.retries")
.with_description("Count of LLM call retries by provider and error type.")
.build();
Self {
token_usage,
operation_duration,
time_to_first_chunk,
time_per_output_chunk,
turns_duration,
runs_outcome,
tools_execution_duration,
tools_execution_count,
context_compaction,
context_compaction_tokens_saved,
subagent_invocations,
#[cfg(feature = "mcp")]
mcp_requests_duration,
llm_retries,
}
}
}
#[must_use]
pub fn tool_duration_ms_to_f64(ms: u64) -> f64 {
if let Ok(v) = u32::try_from(ms) {
return f64::from(v);
}
log::warn!("tool duration {ms}ms exceeds u32::MAX; clamping for histogram");
f64::from(u32::MAX)
}
fn read_cached() -> Option<Arc<Metrics>> {
let guard = match METRICS.read() {
Ok(g) => g,
Err(poisoned) => poisoned.into_inner(),
};
guard.as_ref().map(Arc::clone)
}
fn write_cached(value: Arc<Metrics>) {
let mut guard = match METRICS.write() {
Ok(g) => g,
Err(poisoned) => poisoned.into_inner(),
};
if guard.is_none() {
*guard = Some(value);
}
}