Skip to main content

codetether_agent/telemetry/provider/
request.rs

1//! Single provider request record with timing and token counts.
2
3use chrono::{DateTime, Utc};
4use serde::{Deserialize, Serialize};
5
6/// One LLM provider request. Push into [`super::ProviderMetrics`] via
7/// [`super::ProviderMetrics::record`].
8///
9/// Note: `prompt_tokens` / `completion_tokens` and `input_tokens` /
10/// `output_tokens` carry the same numbers — the duplication exists because
11/// different downstream consumers were written against different field names.
12///
13/// # Examples
14///
15/// ```rust
16/// use codetether_agent::telemetry::ProviderRequestRecord;
17/// use chrono::Utc;
18///
19/// let r = ProviderRequestRecord {
20///     provider: "anthropic".into(),
21///     model: "claude-sonnet-4".into(),
22///     timestamp: Utc::now(),
23///     prompt_tokens: 1_000,
24///     completion_tokens: 500,
25///     input_tokens: 1_000,
26///     output_tokens: 500,
27///     latency_ms: 2_000,
28///     ttft_ms: Some(300),
29///     success: true,
30/// };
31/// assert!((r.tokens_per_second() - 250.0).abs() < 1e-6);
32/// ```
33#[derive(Debug, Clone, Serialize, Deserialize)]
34pub struct ProviderRequestRecord {
35    /// Provider name (e.g. `"anthropic"`, `"openai"`).
36    pub provider: String,
37    /// Model id (e.g. `"claude-sonnet-4"`).
38    pub model: String,
39    /// When the request started (or completed — callers are consistent).
40    pub timestamp: DateTime<Utc>,
41    /// Input tokens (provider's `prompt_tokens` field).
42    pub prompt_tokens: u64,
43    /// Output tokens (provider's `completion_tokens` field).
44    pub completion_tokens: u64,
45    /// Duplicate of `prompt_tokens` under the wire-format name.
46    pub input_tokens: u64,
47    /// Duplicate of `completion_tokens` under the wire-format name.
48    pub output_tokens: u64,
49    /// End-to-end latency in milliseconds.
50    pub latency_ms: u64,
51    /// Time-to-first-token in milliseconds, when the provider streamed.
52    pub ttft_ms: Option<u64>,
53    /// `true` iff the provider returned a non-error response.
54    pub success: bool,
55}
56
57impl ProviderRequestRecord {
58    /// Output tokens per second over the full request latency. Returns `0.0`
59    /// when `latency_ms` is zero, never panics.
60    pub fn tokens_per_second(&self) -> f64 {
61        if self.latency_ms == 0 {
62            return 0.0;
63        }
64        (self.output_tokens as f64) / (self.latency_ms as f64 / 1000.0)
65    }
66}