use crate::error::{ReactError, Result};
use opentelemetry::KeyValue;
use opentelemetry::metrics::{Counter, Histogram, MeterProvider};
use opentelemetry::trace::TracerProvider as _;
use opentelemetry_otlp::WithExportConfig;
use opentelemetry_sdk::metrics::{PeriodicReader, SdkMeterProvider};
use opentelemetry_sdk::trace::TracerProvider;
use std::sync::OnceLock;
use tracing_subscriber::layer::SubscriberExt;
use tracing_subscriber::util::SubscriberInitExt;
#[derive(Debug, Clone)]
pub struct TelemetryConfig {
pub otlp_endpoint: String,
pub service_name: String,
pub enable_console: bool,
}
impl Default for TelemetryConfig {
fn default() -> Self {
Self {
otlp_endpoint: "http://localhost:4317".to_string(),
service_name: "echo-agent".to_string(),
enable_console: true,
}
}
}
static METRICS: OnceLock<Metrics> = OnceLock::new();
#[derive(Debug, Clone)]
pub struct Metrics {
llm_calls: Counter<u64>,
llm_tokens: Counter<u64>,
llm_latency: Histogram<f64>,
tool_executions: Counter<u64>,
tool_latency: Histogram<f64>,
}
impl Metrics {
pub fn get() -> Option<&'static Metrics> {
METRICS.get()
}
pub fn record_llm_call(provider: &str, model: &str, status: &str) {
if let Some(m) = Self::get() {
m.llm_calls.add(
1,
&[
KeyValue::new("provider", provider.to_string()),
KeyValue::new("model", model.to_string()),
KeyValue::new("status", status.to_string()),
],
);
}
}
pub fn record_llm_tokens(provider: &str, model: &str, token_type: &str, count: u64) {
if let Some(m) = Self::get() {
m.llm_tokens.add(
count,
&[
KeyValue::new("provider", provider.to_string()),
KeyValue::new("model", model.to_string()),
KeyValue::new("type", token_type.to_string()),
],
);
}
}
pub fn record_llm_latency(provider: &str, model: &str, duration_ms: f64) {
if let Some(m) = Self::get() {
m.llm_latency.record(
duration_ms,
&[
KeyValue::new("provider", provider.to_string()),
KeyValue::new("model", model.to_string()),
],
);
}
}
pub fn record_tool_execution(tool: &str, status: &str) {
if let Some(m) = Self::get() {
m.tool_executions.add(
1,
&[
KeyValue::new("tool", tool.to_string()),
KeyValue::new("status", status.to_string()),
],
);
}
}
pub fn record_tool_latency(tool: &str, duration_ms: f64) {
if let Some(m) = Self::get() {
m.tool_latency
.record(duration_ms, &[KeyValue::new("tool", tool.to_string())]);
}
}
}
pub fn init_telemetry(config: TelemetryConfig) -> Result<()> {
let exporter = opentelemetry_otlp::SpanExporter::builder()
.with_tonic()
.with_endpoint(&config.otlp_endpoint)
.build()
.map_err(|e| ReactError::Other(format!("OTLP exporter error: {e}")))?;
let provider = TracerProvider::builder()
.with_batch_exporter(exporter, opentelemetry_sdk::runtime::Tokio)
.with_resource(opentelemetry_sdk::Resource::new(vec![KeyValue::new(
"service.name",
config.service_name.clone(),
)]))
.build();
let tracer = provider.tracer("echo-agent");
let otel_layer = tracing_opentelemetry::layer().with_tracer(tracer);
let env_filter = tracing_subscriber::EnvFilter::try_from_default_env()
.unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info"));
if config.enable_console {
let fmt_layer = tracing_subscriber::fmt::layer()
.without_time()
.with_target(false);
tracing_subscriber::registry()
.with(env_filter)
.with(otel_layer)
.with(fmt_layer)
.init();
} else {
tracing_subscriber::registry()
.with(env_filter)
.with(otel_layer)
.init();
}
match init_metrics(&config) {
Ok(()) => {
tracing::info!(endpoint = %config.otlp_endpoint, "Metrics initialized");
}
Err(e) => {
tracing::warn!(error = %e, "Metrics initialization failed (non-fatal), continuing without metrics");
}
}
Ok(())
}
fn init_metrics(config: &TelemetryConfig) -> Result<()> {
let metrics_exporter = opentelemetry_otlp::MetricExporter::builder()
.with_tonic()
.with_endpoint(&config.otlp_endpoint)
.build()
.map_err(|e| ReactError::Other(format!("OTLP metrics exporter error: {e}")))?;
let reader =
PeriodicReader::builder(metrics_exporter, opentelemetry_sdk::runtime::Tokio).build();
let meter_provider = SdkMeterProvider::builder()
.with_reader(reader)
.with_resource(opentelemetry_sdk::Resource::new(vec![KeyValue::new(
"service.name",
config.service_name.clone(),
)]))
.build();
let meter = meter_provider.meter("echo-agent");
let metrics = Metrics {
llm_calls: meter
.u64_counter("llm.calls")
.with_description("Number of LLM API calls")
.build(),
llm_tokens: meter
.u64_counter("llm.tokens")
.with_description("Number of tokens consumed")
.with_unit("tokens")
.build(),
llm_latency: meter
.f64_histogram("llm.latency")
.with_description("LLM API call latency in milliseconds")
.with_unit("ms")
.build(),
tool_executions: meter
.u64_counter("tool.executions")
.with_description("Number of tool executions")
.build(),
tool_latency: meter
.f64_histogram("tool.latency")
.with_description("Tool execution latency in milliseconds")
.with_unit("ms")
.build(),
};
METRICS
.set(metrics)
.map_err(|_| ReactError::Other("Metrics already initialized".to_string()))?;
Ok(())
}
pub fn shutdown_telemetry() {
opentelemetry::global::shutdown_tracer_provider();
}