Skip to main content

adk_model/
usage_tracking.rs

1//! Stream wrapper that records token usage on the active tracing span.
2//!
3//! Wraps any `LlmResponseStream` and intercepts responses carrying `UsageMetadata`,
4//! recording standardized `gen_ai.usage.*` fields via [`adk_telemetry::record_llm_usage`].
5//!
6//! This is applied once per provider in `generate_content`, so every model gets
7//! consistent telemetry without duplicating recording logic.
8
9use adk_core::{LlmResponse, LlmResponseStream, UsageMetadata};
10use futures::StreamExt;
11use std::pin::Pin;
12use tracing::Span;
13
14/// Wrap an `LlmResponseStream` so that the last `UsageMetadata` seen is recorded
15/// on the provided tracing span when the stream yields it.
16///
17/// The span is entered briefly for each item that carries usage metadata,
18/// ensuring [`adk_telemetry::record_llm_usage`] writes to the correct span
19/// regardless of which span is current when the stream is polled.
20///
21/// For non-streaming (single-response) calls the usage is recorded immediately.
22/// For streaming calls the usage typically arrives on the final chunk, so every
23/// chunk with `usage_metadata` overwrites the span fields (last write wins).
24pub fn with_usage_tracking(stream: LlmResponseStream, span: Span) -> LlmResponseStream {
25    let tracked = stream.map(move |result| {
26        if let Ok(ref response) = result {
27            record_usage_from_response(response, &span);
28        }
29        result
30    });
31    Box::pin(tracked) as Pin<Box<_>>
32}
33
34fn record_usage_from_response(response: &LlmResponse, span: &Span) {
35    if let Some(ref usage) = response.usage_metadata {
36        let _entered = span.enter();
37        record_usage(usage);
38    }
39}
40
41fn record_usage(usage: &UsageMetadata) {
42    adk_telemetry::record_llm_usage(&adk_telemetry::LlmUsage {
43        input_tokens: usage.prompt_token_count,
44        output_tokens: usage.candidates_token_count,
45        total_tokens: usage.total_token_count,
46        cache_read_tokens: usage.cache_read_input_token_count,
47        cache_creation_tokens: usage.cache_creation_input_token_count,
48        thinking_tokens: usage.thinking_token_count,
49        audio_input_tokens: usage.audio_input_token_count,
50        audio_output_tokens: usage.audio_output_token_count,
51    });
52}