Skip to main content

harn_vm/llm/api/
telemetry.rs

1//! Per-call provider telemetry envelope.
2//!
3//! Local runtimes (Ollama in particular) report enough server-side timing
4//! information to diagnose slow runs without scraping daemon logs: cold load
5//! vs steady state, prefill vs generation, and tokens-per-second ratios. The
6//! Anthropic and OpenAI hosted APIs don't expose comparable metrics, but the
7//! local runtimes Harn cares most about (Ollama, llama.cpp, MLX, vLLM) all
8//! ship at least a partial subset. This module normalizes whatever the
9//! provider reports into one stable envelope and represents missing fields
10//! as `None` so downstream evals can distinguish "not reported" from "zero".
11//!
12//! Conventions:
13//! - Durations are milliseconds (Ollama reports nanoseconds; we convert).
14//! - Token counts are signed `i64` to match the rest of `LlmResult`.
15//! - `source` identifies which wire format the values were lifted from so
16//!   eval scripts can route on it without re-deriving provider names.
17
18use std::collections::BTreeMap;
19use std::rc::Rc;
20
21use crate::value::VmValue;
22
23/// Wire-format identifiers for `ProviderTelemetry::source`. Keep these in
24/// sync with the matching strings in `docs/src/observability/*` and the
25/// Burin eval aggregator.
26pub mod source {
27    /// Ollama `/api/chat` NDJSON stream — full timing breakdown.
28    pub const OLLAMA_CHAT: &str = "ollama_chat";
29    /// Ollama `/api/generate` (raw) — full timing breakdown.
30    pub const OLLAMA_GENERATE: &str = "ollama_generate";
31    /// OpenAI-style `usage` block (prompt/completion tokens, optional cache
32    /// details). No server-side timings unless the runtime extends the
33    /// schema.
34    pub const OPENAI_USAGE: &str = "openai_usage";
35    /// llama.cpp `usage.timings` extension (`prompt_ms`, `predicted_ms`,
36    /// `predicted_n`, `prompt_n`, ...). Preserved verbatim from the
37    /// non-OpenAI subset.
38    pub const LLAMACPP_TIMINGS: &str = "llamacpp_timings";
39    /// Anthropic Messages API — usage counts only; no timings.
40    pub const ANTHROPIC_USAGE: &str = "anthropic_usage";
41    /// Google Gemini `usageMetadata` block from `generateContent`.
42    pub const GEMINI_USAGE: &str = "gemini_usage";
43    /// Provider responded but we did not capture anything beyond what
44    /// already lives on `LlmResult` (e.g. mock / fake providers, or a
45    /// stream that finished without a usage frame).
46    pub const UNKNOWN: &str = "unknown";
47}
48
49pub(crate) fn elapsed_ms(started: std::time::Instant) -> u64 {
50    started.elapsed().as_millis().min(u128::from(u64::MAX)) as u64
51}
52
53/// Provider-side timing and runtime accounting captured per LLM call.
54///
55/// All fields default to `None` / empty. Producers fill in what they can
56/// extract and leave the rest absent; consumers must treat missing fields as
57/// "not reported by this provider", not "zero".
58#[derive(Clone, Debug, Default, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
59pub struct ProviderTelemetry {
60    /// Wire format the values came from (`ollama_chat`, `openai_usage`, ...).
61    /// See [`source`] for the canonical strings. Empty when no telemetry was
62    /// captured.
63    #[serde(default, skip_serializing_if = "String::is_empty")]
64    pub source: String,
65    /// Total server-side wall clock (Ollama `total_duration`).
66    #[serde(skip_serializing_if = "Option::is_none")]
67    pub server_total_ms: Option<u64>,
68    /// Time the server spent loading/warming the model (Ollama
69    /// `load_duration`). Useful for detecting cold-start latency.
70    #[serde(skip_serializing_if = "Option::is_none")]
71    pub server_load_ms: Option<u64>,
72    /// Prompt-prefill time (Ollama `prompt_eval_duration`). Anything else
73    /// would be marketing — this is the field evals key on for prefill
74    /// regression detection.
75    #[serde(skip_serializing_if = "Option::is_none")]
76    pub server_prompt_eval_ms: Option<u64>,
77    /// Generation/decode time (Ollama `eval_duration`).
78    #[serde(skip_serializing_if = "Option::is_none")]
79    pub server_generation_ms: Option<u64>,
80    /// Tokens the server reports it prefilled (Ollama `prompt_eval_count`).
81    /// Distinct from `LlmResult::input_tokens` because hosted providers
82    /// frequently bill different token boundaries than the on-device
83    /// tokenizer reports.
84    #[serde(skip_serializing_if = "Option::is_none")]
85    pub server_prompt_tokens: Option<i64>,
86    /// Tokens the server reports it generated (Ollama `eval_count`).
87    #[serde(skip_serializing_if = "Option::is_none")]
88    pub server_output_tokens: Option<i64>,
89    /// Client-side wall clock around the HTTP request. Includes network and
90    /// streaming latency the server-side counters omit. Recorded for every
91    /// call regardless of provider.
92    #[serde(skip_serializing_if = "Option::is_none")]
93    pub client_wall_ms: Option<u64>,
94    /// Context window the model was loaded with (where the runtime
95    /// reports it; `/api/ps` for Ollama).
96    #[serde(skip_serializing_if = "Option::is_none")]
97    pub runtime_context_length: Option<u64>,
98    /// Exact model id the server resolved. May differ from
99    /// `LlmResult::model` when an alias / digest is rewritten upstream.
100    #[serde(skip_serializing_if = "Option::is_none")]
101    pub runtime_loaded_model: Option<String>,
102    /// Total resident bytes for the loaded model (Ollama `/api/ps.size`).
103    #[serde(skip_serializing_if = "Option::is_none")]
104    pub runtime_memory_bytes: Option<u64>,
105    /// VRAM bytes for the loaded model (Ollama `/api/ps.size_vram`).
106    #[serde(skip_serializing_if = "Option::is_none")]
107    pub runtime_memory_vram_bytes: Option<u64>,
108    /// When the server will unload the model (Ollama `/api/ps.expires_at`).
109    #[serde(skip_serializing_if = "Option::is_none")]
110    pub runtime_keep_alive_until: Option<String>,
111    /// Provider-supplied request id (`x-request-id` / `request_id`).
112    #[serde(skip_serializing_if = "Option::is_none")]
113    pub request_id: Option<String>,
114}
115
116impl ProviderTelemetry {
117    pub fn new(source: &str) -> Self {
118        Self {
119            source: source.to_string(),
120            ..Self::default()
121        }
122    }
123
124    /// Returns `true` when no meaningful telemetry was captured. A bare
125    /// `client_wall_ms` is still "meaningful" — it lets evals reason about
126    /// per-call latency even for providers that report nothing else.
127    pub fn is_empty(&self) -> bool {
128        let Self {
129            source,
130            server_total_ms,
131            server_load_ms,
132            server_prompt_eval_ms,
133            server_generation_ms,
134            server_prompt_tokens,
135            server_output_tokens,
136            client_wall_ms,
137            runtime_context_length,
138            runtime_loaded_model,
139            runtime_memory_bytes,
140            runtime_memory_vram_bytes,
141            runtime_keep_alive_until,
142            request_id,
143        } = self;
144        source.is_empty()
145            && server_total_ms.is_none()
146            && server_load_ms.is_none()
147            && server_prompt_eval_ms.is_none()
148            && server_generation_ms.is_none()
149            && server_prompt_tokens.is_none()
150            && server_output_tokens.is_none()
151            && client_wall_ms.is_none()
152            && runtime_context_length.is_none()
153            && runtime_loaded_model.is_none()
154            && runtime_memory_bytes.is_none()
155            && runtime_memory_vram_bytes.is_none()
156            && runtime_keep_alive_until.is_none()
157            && request_id.is_none()
158    }
159
160    /// Convert nanoseconds (Ollama's reporting unit) to milliseconds with
161    /// integer rounding. Centralized so every Ollama timing field uses the
162    /// same conversion and zero-vs-None semantics line up.
163    pub fn ns_to_ms(ns: u64) -> u64 {
164        // Use floor division (the conversion is approximate by design); when
165        // the upstream reports 0 ns we want a 0 ms entry rather than None,
166        // so callers should pass through `Some(ns_to_ms(0))` consciously.
167        ns / 1_000_000
168    }
169
170    /// Extract Ollama-shape telemetry from a `done=true` chat or generate
171    /// frame. Returns a populated [`ProviderTelemetry`] whose `source` is
172    /// the caller-provided wire identifier.
173    pub fn from_ollama_done(frame: &serde_json::Value, source: &str) -> Self {
174        let mut telemetry = Self::new(source);
175        telemetry.server_total_ms = ns_field(frame, "total_duration");
176        telemetry.server_load_ms = ns_field(frame, "load_duration");
177        telemetry.server_prompt_eval_ms = ns_field(frame, "prompt_eval_duration");
178        telemetry.server_generation_ms = ns_field(frame, "eval_duration");
179        telemetry.server_prompt_tokens = frame
180            .get("prompt_eval_count")
181            .and_then(serde_json::Value::as_i64);
182        telemetry.server_output_tokens =
183            frame.get("eval_count").and_then(serde_json::Value::as_i64);
184        if let Some(model) = frame.get("model").and_then(serde_json::Value::as_str) {
185            telemetry.runtime_loaded_model = Some(model.to_string());
186        }
187        telemetry
188    }
189
190    /// Extract OpenAI-shape `usage` telemetry. Most OpenAI-compatible local
191    /// runtimes only report counts; llama.cpp's `usage.timings` extension is
192    /// folded in here as well so a single envelope captures both shapes.
193    pub fn from_openai_usage(usage: &serde_json::Value, request_id: Option<&str>) -> Self {
194        let mut telemetry = Self::new(source::OPENAI_USAGE);
195        telemetry.server_prompt_tokens = usage
196            .get("prompt_tokens")
197            .or_else(|| usage.get("input_tokens"))
198            .and_then(serde_json::Value::as_i64);
199        telemetry.server_output_tokens = usage
200            .get("completion_tokens")
201            .or_else(|| usage.get("output_tokens"))
202            .and_then(serde_json::Value::as_i64);
203        if let Some(timings) = usage.get("timings").filter(|value| value.is_object()) {
204            telemetry.source = source::LLAMACPP_TIMINGS.to_string();
205            telemetry.server_prompt_eval_ms = ms_or_round(timings.get("prompt_ms"));
206            telemetry.server_generation_ms = ms_or_round(timings.get("predicted_ms"));
207            // llama.cpp also reports `prompt_n` / `predicted_n` here when
208            // its usage breakdown is enabled; prefer them over the
209            // legacy top-level counts so prefill cache hits surface
210            // correctly.
211            if let Some(prefill) = timings.get("prompt_n").and_then(serde_json::Value::as_i64) {
212                telemetry.server_prompt_tokens = Some(prefill);
213            }
214            if let Some(predicted) = timings
215                .get("predicted_n")
216                .and_then(serde_json::Value::as_i64)
217            {
218                telemetry.server_output_tokens = Some(predicted);
219            }
220            let total = telemetry
221                .server_prompt_eval_ms
222                .unwrap_or(0)
223                .saturating_add(telemetry.server_generation_ms.unwrap_or(0));
224            if total > 0 {
225                telemetry.server_total_ms = Some(total);
226            }
227        }
228        if let Some(request_id) = request_id.filter(|value| !value.is_empty()) {
229            telemetry.request_id = Some(request_id.to_string());
230        }
231        telemetry
232    }
233
234    /// Extract Anthropic-shape `usage` telemetry. Anthropic only reports
235    /// input/output (and cache) counts — preserving the request id is the
236    /// most useful incremental signal beyond what `LlmResult` already
237    /// carries.
238    pub fn from_anthropic_usage(usage: &serde_json::Value, request_id: Option<&str>) -> Self {
239        let mut telemetry = Self::new(source::ANTHROPIC_USAGE);
240        telemetry.server_prompt_tokens = usage
241            .get("input_tokens")
242            .and_then(serde_json::Value::as_i64);
243        telemetry.server_output_tokens = usage
244            .get("output_tokens")
245            .and_then(serde_json::Value::as_i64);
246        if let Some(request_id) = request_id.filter(|value| !value.is_empty()) {
247            telemetry.request_id = Some(request_id.to_string());
248        }
249        telemetry
250    }
251
252    /// Extract Gemini `usageMetadata` counts. Cache-hit accounting stays on
253    /// `LlmResult`; telemetry keeps provider prompt/output counters plus the
254    /// request id for eval joins.
255    pub fn from_gemini_usage(usage: &serde_json::Value, request_id: Option<&str>) -> Self {
256        let mut telemetry = Self::new(source::GEMINI_USAGE);
257        telemetry.server_prompt_tokens = usage
258            .get("promptTokenCount")
259            .and_then(serde_json::Value::as_i64);
260        telemetry.server_output_tokens = usage
261            .get("candidatesTokenCount")
262            .and_then(serde_json::Value::as_i64);
263        if let Some(request_id) = request_id.filter(|value| !value.is_empty()) {
264            telemetry.request_id = Some(request_id.to_string());
265        }
266        telemetry
267    }
268
269    /// Merge a `/api/ps` snapshot of a loaded Ollama model into this
270    /// telemetry envelope. Only fills in fields that were not already
271    /// populated so a per-call extraction keeps precedence.
272    pub fn merge_ollama_ps(&mut self, ps: &OllamaPsModel) {
273        if self.runtime_loaded_model.is_none() {
274            self.runtime_loaded_model = ps.name.clone();
275        }
276        if self.runtime_context_length.is_none() {
277            self.runtime_context_length = ps.context_length;
278        }
279        if self.runtime_memory_bytes.is_none() {
280            self.runtime_memory_bytes = ps.size_bytes;
281        }
282        if self.runtime_memory_vram_bytes.is_none() {
283            self.runtime_memory_vram_bytes = ps.size_vram_bytes;
284        }
285        if self.runtime_keep_alive_until.is_none() {
286            self.runtime_keep_alive_until = ps.expires_at.clone();
287        }
288    }
289
290    /// Render this envelope into the dictionary shape `llm_call` returns.
291    /// Returns `None` if the envelope is empty so callers can omit the key
292    /// entirely.
293    pub fn as_vm_dict(&self) -> Option<VmValue> {
294        if self.is_empty() {
295            return None;
296        }
297        let mut dict: BTreeMap<String, VmValue> = BTreeMap::new();
298        if !self.source.is_empty() {
299            dict.insert(
300                "source".to_string(),
301                VmValue::String(Rc::from(self.source.as_str())),
302            );
303        }
304        insert_opt_u64(&mut dict, "server_total_ms", self.server_total_ms);
305        insert_opt_u64(&mut dict, "server_load_ms", self.server_load_ms);
306        insert_opt_u64(
307            &mut dict,
308            "server_prompt_eval_ms",
309            self.server_prompt_eval_ms,
310        );
311        insert_opt_u64(&mut dict, "server_generation_ms", self.server_generation_ms);
312        insert_opt_i64(&mut dict, "server_prompt_tokens", self.server_prompt_tokens);
313        insert_opt_i64(&mut dict, "server_output_tokens", self.server_output_tokens);
314        insert_opt_u64(&mut dict, "client_wall_ms", self.client_wall_ms);
315        insert_opt_u64(
316            &mut dict,
317            "runtime_context_length",
318            self.runtime_context_length,
319        );
320        if let Some(ref model) = self.runtime_loaded_model {
321            dict.insert(
322                "runtime_loaded_model".to_string(),
323                VmValue::String(Rc::from(model.as_str())),
324            );
325        }
326        insert_opt_u64(&mut dict, "runtime_memory_bytes", self.runtime_memory_bytes);
327        insert_opt_u64(
328            &mut dict,
329            "runtime_memory_vram_bytes",
330            self.runtime_memory_vram_bytes,
331        );
332        if let Some(ref expires) = self.runtime_keep_alive_until {
333            dict.insert(
334                "runtime_keep_alive_until".to_string(),
335                VmValue::String(Rc::from(expires.as_str())),
336            );
337        }
338        if let Some(ref request_id) = self.request_id {
339            dict.insert(
340                "request_id".to_string(),
341                VmValue::String(Rc::from(request_id.as_str())),
342            );
343        }
344        Some(VmValue::Dict(Rc::new(dict)))
345    }
346}
347
348/// Loaded-model snapshot from Ollama's `/api/ps`. Shared with the CLI's
349/// `harn local` family so we don't duplicate the response shape.
350#[derive(Clone, Debug, Default, PartialEq, Eq)]
351pub struct OllamaPsModel {
352    pub name: Option<String>,
353    pub size_bytes: Option<u64>,
354    pub size_vram_bytes: Option<u64>,
355    pub expires_at: Option<String>,
356    pub context_length: Option<u64>,
357}
358
359impl OllamaPsModel {
360    /// Decode one `/api/ps` `models[]` entry. Returns `None` when the entry
361    /// has no usable identifier (an old daemon may emit completely empty
362    /// rows under load).
363    pub fn from_ps_entry(entry: &serde_json::Value) -> Option<Self> {
364        let name = entry
365            .get("name")
366            .and_then(serde_json::Value::as_str)
367            .or_else(|| entry.get("model").and_then(serde_json::Value::as_str))
368            .map(str::to_string);
369        let context_length = entry
370            .get("context_length")
371            .and_then(serde_json::Value::as_u64)
372            .or_else(|| {
373                entry
374                    .get("details")
375                    .and_then(|details| details.get("context_length"))
376                    .and_then(serde_json::Value::as_u64)
377            });
378        Some(Self {
379            name,
380            size_bytes: entry.get("size").and_then(serde_json::Value::as_u64),
381            size_vram_bytes: entry.get("size_vram").and_then(serde_json::Value::as_u64),
382            expires_at: entry
383                .get("expires_at")
384                .and_then(serde_json::Value::as_str)
385                .map(str::to_string),
386            context_length,
387        })
388    }
389}
390
391fn ns_field(frame: &serde_json::Value, key: &str) -> Option<u64> {
392    frame
393        .get(key)
394        .and_then(serde_json::Value::as_u64)
395        .map(ProviderTelemetry::ns_to_ms)
396}
397
398fn ms_or_round(value: Option<&serde_json::Value>) -> Option<u64> {
399    let value = value?;
400    if let Some(n) = value.as_u64() {
401        return Some(n);
402    }
403    value.as_f64().map(|n| n.round().max(0.0) as u64)
404}
405
406fn insert_opt_u64(dict: &mut BTreeMap<String, VmValue>, key: &str, value: Option<u64>) {
407    if let Some(value) = value {
408        dict.insert(key.to_string(), VmValue::Int(value as i64));
409    }
410}
411
412fn insert_opt_i64(dict: &mut BTreeMap<String, VmValue>, key: &str, value: Option<i64>) {
413    if let Some(value) = value {
414        dict.insert(key.to_string(), VmValue::Int(value));
415    }
416}
417
418#[cfg(test)]
419mod tests {
420    use super::*;
421
422    #[test]
423    fn ollama_done_frame_extracts_full_breakdown() {
424        let frame = serde_json::json!({
425            "model": "qwen3.6:35b-a3b-coding-nvfp4",
426            "total_duration": 7_400_000_000u64,
427            "load_duration": 400_000_000u64,
428            "prompt_eval_duration": 1_200_000_000u64,
429            "eval_duration": 5_800_000_000u64,
430            "prompt_eval_count": 1024,
431            "eval_count": 64
432        });
433
434        let telemetry = ProviderTelemetry::from_ollama_done(&frame, source::OLLAMA_CHAT);
435
436        assert_eq!(telemetry.source, source::OLLAMA_CHAT);
437        assert_eq!(telemetry.server_total_ms, Some(7400));
438        assert_eq!(telemetry.server_load_ms, Some(400));
439        assert_eq!(telemetry.server_prompt_eval_ms, Some(1200));
440        assert_eq!(telemetry.server_generation_ms, Some(5800));
441        assert_eq!(telemetry.server_prompt_tokens, Some(1024));
442        assert_eq!(telemetry.server_output_tokens, Some(64));
443        assert_eq!(
444            telemetry.runtime_loaded_model.as_deref(),
445            Some("qwen3.6:35b-a3b-coding-nvfp4")
446        );
447        assert!(!telemetry.is_empty());
448    }
449
450    #[test]
451    fn ollama_done_frame_leaves_missing_fields_as_none() {
452        let frame = serde_json::json!({
453            "model": "qwen3.6:35b-a3b-coding-nvfp4",
454            // Older Ollama builds omit duration fields on early frames.
455        });
456
457        let telemetry = ProviderTelemetry::from_ollama_done(&frame, source::OLLAMA_CHAT);
458
459        assert_eq!(telemetry.server_total_ms, None);
460        assert_eq!(telemetry.server_load_ms, None);
461        assert_eq!(telemetry.server_prompt_eval_ms, None);
462        assert_eq!(telemetry.server_generation_ms, None);
463        assert_eq!(telemetry.server_prompt_tokens, None);
464        assert_eq!(telemetry.server_output_tokens, None);
465    }
466
467    #[test]
468    fn openai_usage_partial_extracts_counts_only() {
469        let usage = serde_json::json!({
470            "prompt_tokens": 200,
471            "completion_tokens": 50
472        });
473
474        let telemetry = ProviderTelemetry::from_openai_usage(&usage, Some("req-abc"));
475
476        assert_eq!(telemetry.source, source::OPENAI_USAGE);
477        assert_eq!(telemetry.server_prompt_tokens, Some(200));
478        assert_eq!(telemetry.server_output_tokens, Some(50));
479        assert_eq!(telemetry.server_prompt_eval_ms, None);
480        assert_eq!(telemetry.request_id.as_deref(), Some("req-abc"));
481    }
482
483    #[test]
484    fn llamacpp_timings_promotes_source_and_fills_durations() {
485        let usage = serde_json::json!({
486            "prompt_tokens": 220,
487            "completion_tokens": 17,
488            "timings": {
489                "prompt_n": 200,
490                "prompt_ms": 145.4,
491                "predicted_n": 17,
492                "predicted_ms": 89.1,
493            }
494        });
495
496        let telemetry = ProviderTelemetry::from_openai_usage(&usage, None);
497
498        assert_eq!(telemetry.source, source::LLAMACPP_TIMINGS);
499        assert_eq!(telemetry.server_prompt_eval_ms, Some(145));
500        assert_eq!(telemetry.server_generation_ms, Some(89));
501        assert_eq!(telemetry.server_total_ms, Some(234));
502        assert_eq!(telemetry.server_prompt_tokens, Some(200));
503        assert_eq!(telemetry.server_output_tokens, Some(17));
504        assert!(!telemetry.is_empty());
505    }
506
507    #[test]
508    fn ps_entry_pulls_context_length_from_top_level_or_details() {
509        let entry = serde_json::json!({
510            "name": "qwen3.6:35b-a3b-coding-nvfp4",
511            "size": 4_700_000_000u64,
512            "size_vram": 4_500_000_000u64,
513            "expires_at": "2026-05-14T10:30:00Z",
514            "context_length": 32768
515        });
516        let model = OllamaPsModel::from_ps_entry(&entry).expect("ps entry parses");
517        assert_eq!(model.context_length, Some(32768));
518
519        let entry_nested = serde_json::json!({
520            "name": "qwen3.6:35b-a3b-coding-nvfp4",
521            "details": {"context_length": 16384}
522        });
523        let nested = OllamaPsModel::from_ps_entry(&entry_nested).expect("ps entry parses");
524        assert_eq!(nested.context_length, Some(16384));
525    }
526
527    #[test]
528    fn merge_ollama_ps_preserves_call_level_values() {
529        let mut telemetry = ProviderTelemetry::new(source::OLLAMA_CHAT);
530        telemetry.runtime_loaded_model = Some("real-model".to_string());
531        let ps = OllamaPsModel {
532            name: Some("alias-model".to_string()),
533            size_bytes: Some(1),
534            size_vram_bytes: Some(2),
535            expires_at: Some("forever".to_string()),
536            context_length: Some(8192),
537        };
538        telemetry.merge_ollama_ps(&ps);
539        assert_eq!(
540            telemetry.runtime_loaded_model.as_deref(),
541            Some("real-model")
542        );
543        assert_eq!(telemetry.runtime_memory_bytes, Some(1));
544        assert_eq!(telemetry.runtime_memory_vram_bytes, Some(2));
545        assert_eq!(
546            telemetry.runtime_keep_alive_until.as_deref(),
547            Some("forever")
548        );
549        assert_eq!(telemetry.runtime_context_length, Some(8192));
550    }
551
552    #[test]
553    fn as_vm_dict_returns_none_when_empty() {
554        let telemetry = ProviderTelemetry::default();
555        assert!(telemetry.is_empty());
556        assert!(telemetry.as_vm_dict().is_none());
557    }
558
559    #[test]
560    fn as_vm_dict_serializes_all_present_fields() {
561        let telemetry = ProviderTelemetry {
562            source: source::OLLAMA_CHAT.to_string(),
563            server_total_ms: Some(100),
564            client_wall_ms: Some(120),
565            runtime_loaded_model: Some("qwen".to_string()),
566            ..Default::default()
567        };
568        let value = telemetry.as_vm_dict().expect("dict present");
569        let dict = value.as_dict().expect("dict body");
570        assert_eq!(
571            dict.get("source").map(VmValue::display).as_deref(),
572            Some(source::OLLAMA_CHAT)
573        );
574        assert_eq!(
575            dict.get("server_total_ms").and_then(|v| match v {
576                VmValue::Int(n) => Some(*n),
577                _ => None,
578            }),
579            Some(100)
580        );
581        assert_eq!(
582            dict.get("client_wall_ms").and_then(|v| match v {
583                VmValue::Int(n) => Some(*n),
584                _ => None,
585            }),
586            Some(120)
587        );
588    }
589}