1use std::any::type_name;
9use std::error::Error;
10use std::time::Instant;
11
12use opentelemetry::global;
13use opentelemetry::metrics::Histogram;
14use opentelemetry::trace::{Span, SpanKind, Status, Tracer};
15use opentelemetry::{Array, KeyValue, StringValue, Value};
16use std::sync::OnceLock;
17
18pub const TELEMETRY_INSTRUMENTATION_NAME: &str = "gestalt.provider";
20pub const GENAI_PROVIDER_NAME: &str = "gestalt";
22
23pub const GENAI_OPERATION_CHAT: &str = "chat";
25pub const GENAI_OPERATION_EXECUTE_TOOL: &str = "execute_tool";
27pub const GENAI_OPERATION_INVOKE_AGENT: &str = "invoke_agent";
29
30pub const GENAI_TOOL_TYPE_DATASTORE: &str = "datastore";
32pub const GENAI_TOOL_TYPE_EXTENSION: &str = "extension";
34
35const OPERATION_DURATION_METRIC: &str = "gen_ai.client.operation.duration";
36const TOKEN_USAGE_METRIC: &str = "gen_ai.client.token.usage";
37const OPERATION_DURATION_BUCKETS: &[f64] = &[
38 0.01, 0.02, 0.04, 0.08, 0.16, 0.32, 0.64, 1.28, 2.56, 5.12, 10.24, 20.48, 40.96, 81.92,
39];
40const TOKEN_USAGE_BUCKETS: &[f64] = &[
41 1.0, 4.0, 16.0, 64.0, 256.0, 1024.0, 4096.0, 16384.0, 65536.0, 262144.0, 1048576.0, 4194304.0,
42 16777216.0, 67108864.0,
43];
44
45static OPERATION_DURATION: OnceLock<Histogram<f64>> = OnceLock::new();
46static TOKEN_USAGE: OnceLock<Histogram<u64>> = OnceLock::new();
47
48#[derive(Clone, Debug, Default)]
50pub struct ModelOperationOptions {
51 pub provider_name: String,
53 pub request_model: String,
55 pub request_options: RequestOptions,
57 pub request_attributes: Vec<KeyValue>,
59}
60
61impl ModelOperationOptions {
62 pub fn new(provider_name: impl Into<String>, request_model: impl Into<String>) -> Self {
64 Self {
65 provider_name: provider_name.into(),
66 request_model: request_model.into(),
67 ..Self::default()
68 }
69 }
70
71 pub fn with_request_options(mut self, request_options: RequestOptions) -> Self {
73 self.request_options = request_options;
74 self
75 }
76
77 pub fn with_request_attribute(mut self, attribute: KeyValue) -> Self {
79 self.request_attributes.push(attribute);
80 self
81 }
82}
83
84#[derive(Clone, Debug, Default)]
86pub struct RequestOptions {
87 pub choice_count: Option<i64>,
89 pub frequency_penalty: Option<f64>,
91 pub max_tokens: Option<i64>,
93 pub presence_penalty: Option<f64>,
95 pub seed: Option<i64>,
97 pub temperature: Option<f64>,
99 pub top_k: Option<i64>,
101 pub top_p: Option<f64>,
103}
104
105#[derive(Clone, Debug, Default)]
107pub struct AgentInvocationOptions {
108 pub agent_name: String,
110 pub session_id: String,
112 pub turn_id: String,
114 pub model: String,
116}
117
118impl AgentInvocationOptions {
119 pub fn new(
121 agent_name: impl Into<String>,
122 session_id: impl Into<String>,
123 turn_id: impl Into<String>,
124 model: impl Into<String>,
125 ) -> Self {
126 Self {
127 agent_name: agent_name.into(),
128 session_id: session_id.into(),
129 turn_id: turn_id.into(),
130 model: model.into(),
131 }
132 }
133}
134
135#[derive(Clone, Debug, Default)]
137pub struct ToolExecutionOptions {
138 pub tool_name: String,
140 pub tool_call_id: String,
142 pub tool_type: String,
144}
145
146impl ToolExecutionOptions {
147 pub fn new(tool_name: impl Into<String>) -> Self {
149 Self {
150 tool_name: tool_name.into(),
151 tool_type: GENAI_TOOL_TYPE_EXTENSION.to_string(),
152 ..Self::default()
153 }
154 }
155
156 pub fn with_tool_call_id(mut self, tool_call_id: impl Into<String>) -> Self {
158 self.tool_call_id = tool_call_id.into();
159 self
160 }
161
162 pub fn with_tool_type(mut self, tool_type: impl Into<String>) -> Self {
164 self.tool_type = tool_type.into();
165 self
166 }
167}
168
169#[derive(Clone, Debug, Default)]
171pub struct TokenUsage {
172 pub input_tokens: Option<u64>,
174 pub output_tokens: Option<u64>,
176 pub cache_creation_input_tokens: Option<u64>,
178 pub cache_read_input_tokens: Option<u64>,
180 pub reasoning_output_tokens: Option<u64>,
182}
183
184#[derive(Debug)]
186pub struct GenAIOperation {
187 span: global::BoxedSpan,
188 started_at: Instant,
189 metric_attributes: Vec<KeyValue>,
190 error_type: Option<String>,
191 ended: bool,
192}
193
194impl GenAIOperation {
195 pub fn end(&mut self) {
197 if self.ended {
198 return;
199 }
200 self.ended = true;
201
202 let mut attributes = self.metric_attributes.clone();
203 if let Some(error_type) = self.error_type.clone() {
204 append_or_replace(
205 &mut attributes,
206 KeyValue::new("error.type", error_type.to_string()),
207 );
208 }
209 operation_duration().record(self.started_at.elapsed().as_secs_f64(), &attributes);
210 self.span.end();
211 }
212
213 pub fn mark_error(&mut self, error_type: impl Into<String>, description: impl Into<String>) {
215 let error_type = clean_string(error_type.into()).unwrap_or_else(|| "_OTHER".to_string());
216 self.error_type = Some(error_type.clone());
217 append_or_replace(
218 &mut self.metric_attributes,
219 KeyValue::new("error.type", error_type.clone()),
220 );
221 self.span
222 .set_attribute(KeyValue::new("error.type", error_type.clone()));
223 self.span.set_status(Status::error(description.into()));
224 }
225
226 pub fn record_error<E>(&mut self, err: &E)
228 where
229 E: Error + 'static,
230 {
231 self.mark_error(type_name::<E>(), err.to_string());
232 self.span.record_error(err);
233 }
234
235 pub fn set_attribute(&mut self, attribute: KeyValue) {
237 if attribute.key.as_str() == "gen_ai.response.model" {
238 append_or_replace(&mut self.metric_attributes, attribute.clone());
239 }
240 self.span.set_attribute(attribute);
241 }
242
243 pub fn set_response_metadata(
245 &mut self,
246 response_id: Option<&str>,
247 response_model: Option<&str>,
248 finish_reasons: &[&str],
249 ) {
250 if let Some(response_id) = clean_string(response_id.unwrap_or_default()) {
251 self.set_attribute(KeyValue::new("gen_ai.response.id", response_id));
252 }
253 if let Some(response_model) = clean_string(response_model.unwrap_or_default()) {
254 self.set_attribute(KeyValue::new("gen_ai.response.model", response_model));
255 }
256 let finish_reasons = finish_reasons
257 .iter()
258 .filter_map(|reason| clean_string(*reason).map(StringValue::from))
259 .collect::<Vec<_>>();
260 if !finish_reasons.is_empty() {
261 self.set_attribute(KeyValue::new(
262 "gen_ai.response.finish_reasons",
263 Value::Array(Array::String(finish_reasons)),
264 ));
265 }
266 }
267
268 pub fn record_usage(&mut self, usage: TokenUsage) {
270 self.set_u64_attribute("gen_ai.usage.input_tokens", usage.input_tokens);
271 self.set_u64_attribute("gen_ai.usage.output_tokens", usage.output_tokens);
272 self.set_u64_attribute(
273 "gen_ai.usage.cache_creation.input_tokens",
274 usage.cache_creation_input_tokens,
275 );
276 self.set_u64_attribute(
277 "gen_ai.usage.cache_read.input_tokens",
278 usage.cache_read_input_tokens,
279 );
280 self.set_u64_attribute(
281 "gen_ai.usage.reasoning.output_tokens",
282 usage.reasoning_output_tokens,
283 );
284
285 self.record_token_usage(usage.input_tokens, "input");
286 self.record_token_usage(usage.output_tokens, "output");
287 }
288
289 fn set_u64_attribute(&mut self, key: &'static str, value: Option<u64>) {
290 let Some(value) = value else {
291 return;
292 };
293 if value <= i64::MAX as u64 {
294 self.set_attribute(KeyValue::new(key, value as i64));
295 }
296 }
297
298 fn record_token_usage(&self, tokens: Option<u64>, token_type: &'static str) {
299 let Some(tokens) = tokens else {
300 return;
301 };
302 let mut attributes = self.metric_attributes.clone();
303 append_or_replace(
304 &mut attributes,
305 KeyValue::new("gen_ai.token.type", token_type),
306 );
307 token_usage().record(tokens, &attributes);
308 }
309}
310
311impl Drop for GenAIOperation {
312 fn drop(&mut self) {
313 self.end();
314 }
315}
316
317pub fn model_operation(options: ModelOperationOptions) -> GenAIOperation {
319 let provider_name = clean_string(options.provider_name).unwrap_or_else(|| "_OTHER".to_string());
320 let request_model = clean_string(options.request_model).unwrap_or_default();
321 let metric_attributes = vec![
322 KeyValue::new("gen_ai.operation.name", GENAI_OPERATION_CHAT),
323 KeyValue::new("gen_ai.provider.name", provider_name),
324 KeyValue::new("gen_ai.request.model", request_model.clone()),
325 ];
326 let mut span_attributes = metric_attributes.clone();
327 span_attributes.extend(request_option_attributes(options.request_options));
328 span_attributes.extend(options.request_attributes);
329
330 start_operation(
331 span_name(GENAI_OPERATION_CHAT, &request_model),
332 SpanKind::Client,
333 span_attributes,
334 metric_attributes,
335 )
336}
337
338pub fn agent_invocation(options: AgentInvocationOptions) -> GenAIOperation {
340 let agent_name = clean_string(options.agent_name).unwrap_or_else(|| "provider".to_string());
341 let model = clean_string(options.model).unwrap_or_default();
342 let span_attributes = vec![
343 KeyValue::new("gen_ai.operation.name", GENAI_OPERATION_INVOKE_AGENT),
344 KeyValue::new("gen_ai.provider.name", GENAI_PROVIDER_NAME),
345 KeyValue::new("gen_ai.agent.name", agent_name.clone()),
346 KeyValue::new(
347 "gen_ai.conversation.id",
348 clean_string(options.session_id).unwrap_or_default(),
349 ),
350 KeyValue::new("gen_ai.request.model", model.clone()),
351 KeyValue::new(
352 "gestalt.agent.turn_id",
353 clean_string(options.turn_id).unwrap_or_default(),
354 ),
355 ];
356 let metric_attributes = vec![
357 KeyValue::new("gen_ai.operation.name", GENAI_OPERATION_INVOKE_AGENT),
358 KeyValue::new("gen_ai.provider.name", GENAI_PROVIDER_NAME),
359 KeyValue::new("gen_ai.agent.name", agent_name.clone()),
360 KeyValue::new("gen_ai.request.model", model),
361 ];
362
363 start_operation(
364 span_name(GENAI_OPERATION_INVOKE_AGENT, &agent_name),
365 SpanKind::Internal,
366 span_attributes,
367 metric_attributes,
368 )
369}
370
371pub fn tool_execution(options: ToolExecutionOptions) -> GenAIOperation {
373 let tool_name = clean_string(options.tool_name).unwrap_or_else(|| "_OTHER".to_string());
374 let tool_type =
375 clean_string(options.tool_type).unwrap_or_else(|| GENAI_TOOL_TYPE_EXTENSION.to_string());
376 let span_attributes = vec![
377 KeyValue::new("gen_ai.operation.name", GENAI_OPERATION_EXECUTE_TOOL),
378 KeyValue::new("gen_ai.provider.name", GENAI_PROVIDER_NAME),
379 KeyValue::new("gen_ai.tool.name", tool_name.clone()),
380 KeyValue::new(
381 "gen_ai.tool.call.id",
382 clean_string(options.tool_call_id).unwrap_or_default(),
383 ),
384 KeyValue::new("gen_ai.tool.type", tool_type.clone()),
385 ];
386 let metric_attributes = vec![
387 KeyValue::new("gen_ai.operation.name", GENAI_OPERATION_EXECUTE_TOOL),
388 KeyValue::new("gen_ai.provider.name", GENAI_PROVIDER_NAME),
389 KeyValue::new("gen_ai.tool.name", tool_name.clone()),
390 KeyValue::new("gen_ai.tool.type", tool_type),
391 ];
392
393 start_operation(
394 span_name(GENAI_OPERATION_EXECUTE_TOOL, &tool_name),
395 SpanKind::Internal,
396 span_attributes,
397 metric_attributes,
398 )
399}
400
401fn start_operation(
402 name: String,
403 kind: SpanKind,
404 span_attributes: Vec<KeyValue>,
405 metric_attributes: Vec<KeyValue>,
406) -> GenAIOperation {
407 let tracer = global::tracer(TELEMETRY_INSTRUMENTATION_NAME);
408 let span = tracer
409 .span_builder(name)
410 .with_kind(kind)
411 .with_attributes(span_attributes)
412 .start(&tracer);
413
414 GenAIOperation {
415 span,
416 started_at: Instant::now(),
417 metric_attributes,
418 error_type: None,
419 ended: false,
420 }
421}
422
423fn operation_duration() -> &'static Histogram<f64> {
424 OPERATION_DURATION.get_or_init(|| {
425 global::meter(TELEMETRY_INSTRUMENTATION_NAME)
426 .f64_histogram(OPERATION_DURATION_METRIC)
427 .with_unit("s")
428 .with_description("GenAI operation duration.")
429 .with_boundaries(OPERATION_DURATION_BUCKETS.to_vec())
430 .build()
431 })
432}
433
434fn token_usage() -> &'static Histogram<u64> {
435 TOKEN_USAGE.get_or_init(|| {
436 global::meter(TELEMETRY_INSTRUMENTATION_NAME)
437 .u64_histogram(TOKEN_USAGE_METRIC)
438 .with_unit("{token}")
439 .with_description("Number of input and output tokens used.")
440 .with_boundaries(TOKEN_USAGE_BUCKETS.to_vec())
441 .build()
442 })
443}
444
445fn request_option_attributes(options: RequestOptions) -> Vec<KeyValue> {
446 let mut attributes = Vec::new();
447 push_i64(
448 &mut attributes,
449 "gen_ai.request.choice.count",
450 options.choice_count,
451 );
452 push_f64(
453 &mut attributes,
454 "gen_ai.request.frequency_penalty",
455 options.frequency_penalty,
456 );
457 push_i64(
458 &mut attributes,
459 "gen_ai.request.max_tokens",
460 options.max_tokens,
461 );
462 push_f64(
463 &mut attributes,
464 "gen_ai.request.presence_penalty",
465 options.presence_penalty,
466 );
467 push_i64(&mut attributes, "gen_ai.request.seed", options.seed);
468 push_f64(
469 &mut attributes,
470 "gen_ai.request.temperature",
471 options.temperature,
472 );
473 push_i64(&mut attributes, "gen_ai.request.top_k", options.top_k);
474 push_f64(&mut attributes, "gen_ai.request.top_p", options.top_p);
475 attributes
476}
477
478fn push_i64(attributes: &mut Vec<KeyValue>, key: &'static str, value: Option<i64>) {
479 if let Some(value) = value {
480 attributes.push(KeyValue::new(key, value));
481 }
482}
483
484fn push_f64(attributes: &mut Vec<KeyValue>, key: &'static str, value: Option<f64>) {
485 if let Some(value) = value.filter(|value| value.is_finite()) {
486 attributes.push(KeyValue::new(key, value));
487 }
488}
489
490fn append_or_replace(attributes: &mut Vec<KeyValue>, attribute: KeyValue) {
491 if let Some(existing) = attributes
492 .iter_mut()
493 .find(|existing| existing.key.as_str() == attribute.key.as_str())
494 {
495 *existing = attribute;
496 } else {
497 attributes.push(attribute);
498 }
499}
500
501fn span_name(operation: &'static str, subject: &str) -> String {
502 let subject = subject.trim();
503 if subject.is_empty() {
504 operation.to_string()
505 } else {
506 format!("{operation} {subject}")
507 }
508}
509
510fn clean_string(value: impl Into<String>) -> Option<String> {
511 let value = value.into().trim().to_string();
512 if value.is_empty() { None } else { Some(value) }
513}
514
515#[cfg(test)]
516mod tests {
517 use super::*;
518 use std::fmt;
519
520 #[derive(Debug)]
521 struct CustomTelemetryError;
522
523 impl fmt::Display for CustomTelemetryError {
524 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
525 write!(f, "custom telemetry error")
526 }
527 }
528
529 impl Error for CustomTelemetryError {}
530
531 #[test]
532 fn model_metric_attributes_exclude_request_options() {
533 let operation = model_operation(
534 ModelOperationOptions::new("openai", "gpt-4.1").with_request_options(RequestOptions {
535 seed: Some(123),
536 temperature: Some(0.2),
537 ..RequestOptions::default()
538 }),
539 );
540
541 assert!(
542 !operation
543 .metric_attributes
544 .iter()
545 .any(|attr| attr.key.as_str() == "gen_ai.request.seed")
546 );
547 }
548
549 #[test]
550 fn operations_record_without_configured_sdk() {
551 let mut operation = model_operation(ModelOperationOptions::new("openai", "gpt-4.1"));
552 operation.set_response_metadata(Some("resp-123"), Some("gpt-4.1"), &["stop"]);
553 operation.record_usage(TokenUsage {
554 input_tokens: Some(12),
555 output_tokens: Some(34),
556 ..TokenUsage::default()
557 });
558 operation.end();
559
560 let mut agent = agent_invocation(AgentInvocationOptions::new(
561 "simple",
562 "session-123",
563 "turn-123",
564 "claude-opus-4-1",
565 ));
566 agent.mark_error("agent_error", "agent failed");
567 agent.end();
568
569 let mut tool = tool_execution(
570 ToolExecutionOptions::new("github.search").with_tool_call_id("call-123"),
571 );
572 tool.mark_error("tool_error", "tool failed");
573 tool.end();
574 }
575
576 #[test]
577 fn record_error_uses_concrete_error_type() {
578 let mut operation = model_operation(ModelOperationOptions::new("openai", "gpt-4.1"));
579 let err = CustomTelemetryError;
580
581 operation.record_error(&err);
582
583 assert_eq!(
584 operation.error_type.as_deref(),
585 Some("gestalt::telemetry::tests::CustomTelemetryError")
586 );
587 }
588}