adk_model/usage_tracking.rs
1//! Stream wrapper that records token usage on the active tracing span.
2//!
3//! Wraps any `LlmResponseStream` and intercepts responses carrying `UsageMetadata`,
4//! recording standardized `gen_ai.usage.*` fields via [`adk_telemetry::record_llm_usage`].
5//!
6//! This is applied once per provider in `generate_content`, so every model gets
7//! consistent telemetry without duplicating recording logic.
8
9use adk_core::{LlmResponse, LlmResponseStream, UsageMetadata};
10use futures::StreamExt;
11use std::pin::Pin;
12use tracing::Span;
13
14/// Wrap an `LlmResponseStream` so that the last `UsageMetadata` seen is recorded
15/// on the provided tracing span when the stream yields it.
16///
17/// The span is entered briefly for each item that carries usage metadata,
18/// ensuring [`adk_telemetry::record_llm_usage`] writes to the correct span
19/// regardless of which span is current when the stream is polled.
20///
21/// For non-streaming (single-response) calls the usage is recorded immediately.
22/// For streaming calls the usage typically arrives on the final chunk, so every
23/// chunk with `usage_metadata` overwrites the span fields (last write wins).
24pub fn with_usage_tracking(stream: LlmResponseStream, span: Span) -> LlmResponseStream {
25 let tracked = stream.map(move |result| {
26 if let Ok(ref response) = result {
27 record_usage_from_response(response, &span);
28 }
29 result
30 });
31 Box::pin(tracked) as Pin<Box<_>>
32}
33
34fn record_usage_from_response(response: &LlmResponse, span: &Span) {
35 if let Some(ref usage) = response.usage_metadata {
36 let _entered = span.enter();
37 record_usage(usage);
38 }
39}
40
41fn record_usage(usage: &UsageMetadata) {
42 adk_telemetry::record_llm_usage(&adk_telemetry::LlmUsage {
43 input_tokens: usage.prompt_token_count,
44 output_tokens: usage.candidates_token_count,
45 total_tokens: usage.total_token_count,
46 cache_read_tokens: usage.cache_read_input_token_count,
47 cache_creation_tokens: usage.cache_creation_input_token_count,
48 thinking_tokens: usage.thinking_token_count,
49 audio_input_tokens: usage.audio_input_token_count,
50 audio_output_tokens: usage.audio_output_token_count,
51 });
52}