codetether_agent/telemetry/provider/request.rs
1//! Single provider request record with timing and token counts.
2
3use chrono::{DateTime, Utc};
4use serde::{Deserialize, Serialize};
5
6/// One LLM provider request. Push into [`super::ProviderMetrics`] via
7/// [`super::ProviderMetrics::record`].
8///
9/// Note: `prompt_tokens` / `completion_tokens` and `input_tokens` /
10/// `output_tokens` carry the same numbers — the duplication exists because
11/// different downstream consumers were written against different field names.
12///
13/// # Examples
14///
15/// ```rust
16/// use codetether_agent::telemetry::ProviderRequestRecord;
17/// use chrono::Utc;
18///
19/// let r = ProviderRequestRecord {
20/// provider: "anthropic".into(),
21/// model: "claude-sonnet-4".into(),
22/// timestamp: Utc::now(),
23/// prompt_tokens: 1_000,
24/// completion_tokens: 500,
25/// input_tokens: 1_000,
26/// output_tokens: 500,
27/// latency_ms: 2_000,
28/// ttft_ms: Some(300),
29/// success: true,
30/// };
31/// assert!((r.tokens_per_second() - 250.0).abs() < 1e-6);
32/// ```
33#[derive(Debug, Clone, Serialize, Deserialize)]
34pub struct ProviderRequestRecord {
35 /// Provider name (e.g. `"anthropic"`, `"openai"`).
36 pub provider: String,
37 /// Model id (e.g. `"claude-sonnet-4"`).
38 pub model: String,
39 /// When the request started (or completed — callers are consistent).
40 pub timestamp: DateTime<Utc>,
41 /// Input tokens (provider's `prompt_tokens` field).
42 pub prompt_tokens: u64,
43 /// Output tokens (provider's `completion_tokens` field).
44 pub completion_tokens: u64,
45 /// Duplicate of `prompt_tokens` under the wire-format name.
46 pub input_tokens: u64,
47 /// Duplicate of `completion_tokens` under the wire-format name.
48 pub output_tokens: u64,
49 /// End-to-end latency in milliseconds.
50 pub latency_ms: u64,
51 /// Time-to-first-token in milliseconds, when the provider streamed.
52 pub ttft_ms: Option<u64>,
53 /// `true` iff the provider returned a non-error response.
54 pub success: bool,
55}
56
57impl ProviderRequestRecord {
58 /// Output tokens per second over the full request latency. Returns `0.0`
59 /// when `latency_ms` is zero, never panics.
60 pub fn tokens_per_second(&self) -> f64 {
61 if self.latency_ms == 0 {
62 return 0.0;
63 }
64 (self.output_tokens as f64) / (self.latency_ms as f64 / 1000.0)
65 }
66}