Skip to main content

llmkit_tower/
tracing.rs

1//! Tracing layer: a structured span per call with latency and token counts.
2
3use std::sync::Arc;
4use std::time::Instant;
5
6use async_trait::async_trait;
7use llmkit_core::{
8    ChatRequest, ChatResponse, ChatStream, CostEstimate, EmbedRequest, EmbedResponse, LlmProvider,
9    LlmResult,
10};
11
12use crate::layer::LlmLayer;
13
14/// Emits a `tracing` span around each call with latency and usage.
15#[derive(Debug, Clone, Copy, Default)]
16pub struct TracingLayer;
17
18impl TracingLayer {
19    /// Construct the layer.
20    pub fn new() -> Self {
21        Self
22    }
23}
24
25impl LlmLayer for TracingLayer {
26    type Provider = Tracing;
27    fn layer(self, inner: Arc<dyn LlmProvider>) -> Tracing {
28        Tracing { inner }
29    }
30}
31
32/// Provider produced by [`TracingLayer`].
33pub struct Tracing {
34    inner: Arc<dyn LlmProvider>,
35}
36
37#[async_trait]
38impl LlmProvider for Tracing {
39    async fn chat(&self, req: ChatRequest) -> LlmResult<ChatResponse> {
40        let span = tracing::info_span!("llm.chat", provider = self.inner.name(), model = self.inner.model());
41        let _e = span.enter();
42        let start = Instant::now();
43
44        let result = self.inner.chat(req).await;
45        let latency_ms = start.elapsed().as_millis() as u64;
46
47        match &result {
48            Ok(resp) => tracing::info!(
49                latency_ms,
50                prompt_tokens = resp.usage.prompt,
51                completion_tokens = resp.usage.completion,
52                "chat completed"
53            ),
54            Err(e) => tracing::warn!(latency_ms, error = %e, "chat failed"),
55        }
56        result
57    }
58
59    async fn chat_stream(&self, req: ChatRequest) -> LlmResult<ChatStream> {
60        let span = tracing::info_span!("llm.chat_stream", provider = self.inner.name(), model = self.inner.model());
61        let _e = span.enter();
62        tracing::info!("stream opened");
63        self.inner.chat_stream(req).await
64    }
65
66    async fn embed(&self, req: EmbedRequest) -> LlmResult<EmbedResponse> {
67        let span = tracing::info_span!("llm.embed", provider = self.inner.name());
68        let _e = span.enter();
69        let start = Instant::now();
70        let result = self.inner.embed(req).await;
71        let latency_ms = start.elapsed().as_millis() as u64;
72        match &result {
73            Ok(resp) => tracing::info!(latency_ms, count = resp.embeddings.len(), "embed completed"),
74            Err(e) => tracing::warn!(latency_ms, error = %e, "embed failed"),
75        }
76        result
77    }
78
79    fn name(&self) -> &'static str {
80        self.inner.name()
81    }
82
83    fn model(&self) -> &str {
84        self.inner.model()
85    }
86
87    fn estimate_cost(&self, req: &ChatRequest) -> Option<CostEstimate> {
88        self.inner.estimate_cost(req)
89    }
90}