1use std::collections::BTreeMap;
19use std::rc::Rc;
20
21use crate::value::VmValue;
22
23pub mod source {
27 pub const OLLAMA_CHAT: &str = "ollama_chat";
29 pub const OLLAMA_GENERATE: &str = "ollama_generate";
31 pub const OPENAI_USAGE: &str = "openai_usage";
35 pub const LLAMACPP_TIMINGS: &str = "llamacpp_timings";
39 pub const ANTHROPIC_USAGE: &str = "anthropic_usage";
41 pub const GEMINI_USAGE: &str = "gemini_usage";
43 pub const UNKNOWN: &str = "unknown";
47}
48
49pub(crate) fn elapsed_ms(started: std::time::Instant) -> u64 {
50 started.elapsed().as_millis().min(u128::from(u64::MAX)) as u64
51}
52
53#[derive(Clone, Debug, Default, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
59pub struct ProviderTelemetry {
60 #[serde(default, skip_serializing_if = "String::is_empty")]
64 pub source: String,
65 #[serde(skip_serializing_if = "Option::is_none")]
67 pub server_total_ms: Option<u64>,
68 #[serde(skip_serializing_if = "Option::is_none")]
71 pub server_load_ms: Option<u64>,
72 #[serde(skip_serializing_if = "Option::is_none")]
76 pub server_prompt_eval_ms: Option<u64>,
77 #[serde(skip_serializing_if = "Option::is_none")]
79 pub server_generation_ms: Option<u64>,
80 #[serde(skip_serializing_if = "Option::is_none")]
85 pub server_prompt_tokens: Option<i64>,
86 #[serde(skip_serializing_if = "Option::is_none")]
88 pub server_output_tokens: Option<i64>,
89 #[serde(skip_serializing_if = "Option::is_none")]
93 pub client_wall_ms: Option<u64>,
94 #[serde(skip_serializing_if = "Option::is_none")]
97 pub runtime_context_length: Option<u64>,
98 #[serde(skip_serializing_if = "Option::is_none")]
101 pub runtime_loaded_model: Option<String>,
102 #[serde(skip_serializing_if = "Option::is_none")]
104 pub runtime_memory_bytes: Option<u64>,
105 #[serde(skip_serializing_if = "Option::is_none")]
107 pub runtime_memory_vram_bytes: Option<u64>,
108 #[serde(skip_serializing_if = "Option::is_none")]
110 pub runtime_keep_alive_until: Option<String>,
111 #[serde(skip_serializing_if = "Option::is_none")]
113 pub request_id: Option<String>,
114}
115
116impl ProviderTelemetry {
117 pub fn new(source: &str) -> Self {
118 Self {
119 source: source.to_string(),
120 ..Self::default()
121 }
122 }
123
124 pub fn is_empty(&self) -> bool {
128 let Self {
129 source,
130 server_total_ms,
131 server_load_ms,
132 server_prompt_eval_ms,
133 server_generation_ms,
134 server_prompt_tokens,
135 server_output_tokens,
136 client_wall_ms,
137 runtime_context_length,
138 runtime_loaded_model,
139 runtime_memory_bytes,
140 runtime_memory_vram_bytes,
141 runtime_keep_alive_until,
142 request_id,
143 } = self;
144 source.is_empty()
145 && server_total_ms.is_none()
146 && server_load_ms.is_none()
147 && server_prompt_eval_ms.is_none()
148 && server_generation_ms.is_none()
149 && server_prompt_tokens.is_none()
150 && server_output_tokens.is_none()
151 && client_wall_ms.is_none()
152 && runtime_context_length.is_none()
153 && runtime_loaded_model.is_none()
154 && runtime_memory_bytes.is_none()
155 && runtime_memory_vram_bytes.is_none()
156 && runtime_keep_alive_until.is_none()
157 && request_id.is_none()
158 }
159
160 pub fn ns_to_ms(ns: u64) -> u64 {
164 ns / 1_000_000
168 }
169
170 pub fn from_ollama_done(frame: &serde_json::Value, source: &str) -> Self {
174 let mut telemetry = Self::new(source);
175 telemetry.server_total_ms = ns_field(frame, "total_duration");
176 telemetry.server_load_ms = ns_field(frame, "load_duration");
177 telemetry.server_prompt_eval_ms = ns_field(frame, "prompt_eval_duration");
178 telemetry.server_generation_ms = ns_field(frame, "eval_duration");
179 telemetry.server_prompt_tokens = frame
180 .get("prompt_eval_count")
181 .and_then(serde_json::Value::as_i64);
182 telemetry.server_output_tokens =
183 frame.get("eval_count").and_then(serde_json::Value::as_i64);
184 if let Some(model) = frame.get("model").and_then(serde_json::Value::as_str) {
185 telemetry.runtime_loaded_model = Some(model.to_string());
186 }
187 telemetry
188 }
189
190 pub fn from_openai_usage(usage: &serde_json::Value, request_id: Option<&str>) -> Self {
194 let mut telemetry = Self::new(source::OPENAI_USAGE);
195 telemetry.server_prompt_tokens = usage
196 .get("prompt_tokens")
197 .or_else(|| usage.get("input_tokens"))
198 .and_then(serde_json::Value::as_i64);
199 telemetry.server_output_tokens = usage
200 .get("completion_tokens")
201 .or_else(|| usage.get("output_tokens"))
202 .and_then(serde_json::Value::as_i64);
203 if let Some(timings) = usage.get("timings").filter(|value| value.is_object()) {
204 telemetry.source = source::LLAMACPP_TIMINGS.to_string();
205 telemetry.server_prompt_eval_ms = ms_or_round(timings.get("prompt_ms"));
206 telemetry.server_generation_ms = ms_or_round(timings.get("predicted_ms"));
207 if let Some(prefill) = timings.get("prompt_n").and_then(serde_json::Value::as_i64) {
212 telemetry.server_prompt_tokens = Some(prefill);
213 }
214 if let Some(predicted) = timings
215 .get("predicted_n")
216 .and_then(serde_json::Value::as_i64)
217 {
218 telemetry.server_output_tokens = Some(predicted);
219 }
220 let total = telemetry
221 .server_prompt_eval_ms
222 .unwrap_or(0)
223 .saturating_add(telemetry.server_generation_ms.unwrap_or(0));
224 if total > 0 {
225 telemetry.server_total_ms = Some(total);
226 }
227 }
228 if let Some(request_id) = request_id.filter(|value| !value.is_empty()) {
229 telemetry.request_id = Some(request_id.to_string());
230 }
231 telemetry
232 }
233
234 pub fn from_anthropic_usage(usage: &serde_json::Value, request_id: Option<&str>) -> Self {
239 let mut telemetry = Self::new(source::ANTHROPIC_USAGE);
240 telemetry.server_prompt_tokens = usage
241 .get("input_tokens")
242 .and_then(serde_json::Value::as_i64);
243 telemetry.server_output_tokens = usage
244 .get("output_tokens")
245 .and_then(serde_json::Value::as_i64);
246 if let Some(request_id) = request_id.filter(|value| !value.is_empty()) {
247 telemetry.request_id = Some(request_id.to_string());
248 }
249 telemetry
250 }
251
252 pub fn from_gemini_usage(usage: &serde_json::Value, request_id: Option<&str>) -> Self {
256 let mut telemetry = Self::new(source::GEMINI_USAGE);
257 telemetry.server_prompt_tokens = usage
258 .get("promptTokenCount")
259 .and_then(serde_json::Value::as_i64);
260 telemetry.server_output_tokens = usage
261 .get("candidatesTokenCount")
262 .and_then(serde_json::Value::as_i64);
263 if let Some(request_id) = request_id.filter(|value| !value.is_empty()) {
264 telemetry.request_id = Some(request_id.to_string());
265 }
266 telemetry
267 }
268
269 pub fn merge_ollama_ps(&mut self, ps: &OllamaPsModel) {
273 if self.runtime_loaded_model.is_none() {
274 self.runtime_loaded_model = ps.name.clone();
275 }
276 if self.runtime_context_length.is_none() {
277 self.runtime_context_length = ps.context_length;
278 }
279 if self.runtime_memory_bytes.is_none() {
280 self.runtime_memory_bytes = ps.size_bytes;
281 }
282 if self.runtime_memory_vram_bytes.is_none() {
283 self.runtime_memory_vram_bytes = ps.size_vram_bytes;
284 }
285 if self.runtime_keep_alive_until.is_none() {
286 self.runtime_keep_alive_until = ps.expires_at.clone();
287 }
288 }
289
290 pub fn as_vm_dict(&self) -> Option<VmValue> {
294 if self.is_empty() {
295 return None;
296 }
297 let mut dict: BTreeMap<String, VmValue> = BTreeMap::new();
298 if !self.source.is_empty() {
299 dict.insert(
300 "source".to_string(),
301 VmValue::String(Rc::from(self.source.as_str())),
302 );
303 }
304 insert_opt_u64(&mut dict, "server_total_ms", self.server_total_ms);
305 insert_opt_u64(&mut dict, "server_load_ms", self.server_load_ms);
306 insert_opt_u64(
307 &mut dict,
308 "server_prompt_eval_ms",
309 self.server_prompt_eval_ms,
310 );
311 insert_opt_u64(&mut dict, "server_generation_ms", self.server_generation_ms);
312 insert_opt_i64(&mut dict, "server_prompt_tokens", self.server_prompt_tokens);
313 insert_opt_i64(&mut dict, "server_output_tokens", self.server_output_tokens);
314 insert_opt_u64(&mut dict, "client_wall_ms", self.client_wall_ms);
315 insert_opt_u64(
316 &mut dict,
317 "runtime_context_length",
318 self.runtime_context_length,
319 );
320 if let Some(ref model) = self.runtime_loaded_model {
321 dict.insert(
322 "runtime_loaded_model".to_string(),
323 VmValue::String(Rc::from(model.as_str())),
324 );
325 }
326 insert_opt_u64(&mut dict, "runtime_memory_bytes", self.runtime_memory_bytes);
327 insert_opt_u64(
328 &mut dict,
329 "runtime_memory_vram_bytes",
330 self.runtime_memory_vram_bytes,
331 );
332 if let Some(ref expires) = self.runtime_keep_alive_until {
333 dict.insert(
334 "runtime_keep_alive_until".to_string(),
335 VmValue::String(Rc::from(expires.as_str())),
336 );
337 }
338 if let Some(ref request_id) = self.request_id {
339 dict.insert(
340 "request_id".to_string(),
341 VmValue::String(Rc::from(request_id.as_str())),
342 );
343 }
344 Some(VmValue::Dict(Rc::new(dict)))
345 }
346}
347
348#[derive(Clone, Debug, Default, PartialEq, Eq)]
351pub struct OllamaPsModel {
352 pub name: Option<String>,
353 pub size_bytes: Option<u64>,
354 pub size_vram_bytes: Option<u64>,
355 pub expires_at: Option<String>,
356 pub context_length: Option<u64>,
357}
358
359impl OllamaPsModel {
360 pub fn from_ps_entry(entry: &serde_json::Value) -> Option<Self> {
364 let name = entry
365 .get("name")
366 .and_then(serde_json::Value::as_str)
367 .or_else(|| entry.get("model").and_then(serde_json::Value::as_str))
368 .map(str::to_string);
369 let context_length = entry
370 .get("context_length")
371 .and_then(serde_json::Value::as_u64)
372 .or_else(|| {
373 entry
374 .get("details")
375 .and_then(|details| details.get("context_length"))
376 .and_then(serde_json::Value::as_u64)
377 });
378 Some(Self {
379 name,
380 size_bytes: entry.get("size").and_then(serde_json::Value::as_u64),
381 size_vram_bytes: entry.get("size_vram").and_then(serde_json::Value::as_u64),
382 expires_at: entry
383 .get("expires_at")
384 .and_then(serde_json::Value::as_str)
385 .map(str::to_string),
386 context_length,
387 })
388 }
389}
390
391fn ns_field(frame: &serde_json::Value, key: &str) -> Option<u64> {
392 frame
393 .get(key)
394 .and_then(serde_json::Value::as_u64)
395 .map(ProviderTelemetry::ns_to_ms)
396}
397
398fn ms_or_round(value: Option<&serde_json::Value>) -> Option<u64> {
399 let value = value?;
400 if let Some(n) = value.as_u64() {
401 return Some(n);
402 }
403 value.as_f64().map(|n| n.round().max(0.0) as u64)
404}
405
406fn insert_opt_u64(dict: &mut BTreeMap<String, VmValue>, key: &str, value: Option<u64>) {
407 if let Some(value) = value {
408 dict.insert(key.to_string(), VmValue::Int(value as i64));
409 }
410}
411
412fn insert_opt_i64(dict: &mut BTreeMap<String, VmValue>, key: &str, value: Option<i64>) {
413 if let Some(value) = value {
414 dict.insert(key.to_string(), VmValue::Int(value));
415 }
416}
417
418#[cfg(test)]
419mod tests {
420 use super::*;
421
422 #[test]
423 fn ollama_done_frame_extracts_full_breakdown() {
424 let frame = serde_json::json!({
425 "model": "qwen3.6:35b-a3b-coding-nvfp4",
426 "total_duration": 7_400_000_000u64,
427 "load_duration": 400_000_000u64,
428 "prompt_eval_duration": 1_200_000_000u64,
429 "eval_duration": 5_800_000_000u64,
430 "prompt_eval_count": 1024,
431 "eval_count": 64
432 });
433
434 let telemetry = ProviderTelemetry::from_ollama_done(&frame, source::OLLAMA_CHAT);
435
436 assert_eq!(telemetry.source, source::OLLAMA_CHAT);
437 assert_eq!(telemetry.server_total_ms, Some(7400));
438 assert_eq!(telemetry.server_load_ms, Some(400));
439 assert_eq!(telemetry.server_prompt_eval_ms, Some(1200));
440 assert_eq!(telemetry.server_generation_ms, Some(5800));
441 assert_eq!(telemetry.server_prompt_tokens, Some(1024));
442 assert_eq!(telemetry.server_output_tokens, Some(64));
443 assert_eq!(
444 telemetry.runtime_loaded_model.as_deref(),
445 Some("qwen3.6:35b-a3b-coding-nvfp4")
446 );
447 assert!(!telemetry.is_empty());
448 }
449
450 #[test]
451 fn ollama_done_frame_leaves_missing_fields_as_none() {
452 let frame = serde_json::json!({
453 "model": "qwen3.6:35b-a3b-coding-nvfp4",
454 });
456
457 let telemetry = ProviderTelemetry::from_ollama_done(&frame, source::OLLAMA_CHAT);
458
459 assert_eq!(telemetry.server_total_ms, None);
460 assert_eq!(telemetry.server_load_ms, None);
461 assert_eq!(telemetry.server_prompt_eval_ms, None);
462 assert_eq!(telemetry.server_generation_ms, None);
463 assert_eq!(telemetry.server_prompt_tokens, None);
464 assert_eq!(telemetry.server_output_tokens, None);
465 }
466
467 #[test]
468 fn openai_usage_partial_extracts_counts_only() {
469 let usage = serde_json::json!({
470 "prompt_tokens": 200,
471 "completion_tokens": 50
472 });
473
474 let telemetry = ProviderTelemetry::from_openai_usage(&usage, Some("req-abc"));
475
476 assert_eq!(telemetry.source, source::OPENAI_USAGE);
477 assert_eq!(telemetry.server_prompt_tokens, Some(200));
478 assert_eq!(telemetry.server_output_tokens, Some(50));
479 assert_eq!(telemetry.server_prompt_eval_ms, None);
480 assert_eq!(telemetry.request_id.as_deref(), Some("req-abc"));
481 }
482
483 #[test]
484 fn llamacpp_timings_promotes_source_and_fills_durations() {
485 let usage = serde_json::json!({
486 "prompt_tokens": 220,
487 "completion_tokens": 17,
488 "timings": {
489 "prompt_n": 200,
490 "prompt_ms": 145.4,
491 "predicted_n": 17,
492 "predicted_ms": 89.1,
493 }
494 });
495
496 let telemetry = ProviderTelemetry::from_openai_usage(&usage, None);
497
498 assert_eq!(telemetry.source, source::LLAMACPP_TIMINGS);
499 assert_eq!(telemetry.server_prompt_eval_ms, Some(145));
500 assert_eq!(telemetry.server_generation_ms, Some(89));
501 assert_eq!(telemetry.server_total_ms, Some(234));
502 assert_eq!(telemetry.server_prompt_tokens, Some(200));
503 assert_eq!(telemetry.server_output_tokens, Some(17));
504 assert!(!telemetry.is_empty());
505 }
506
507 #[test]
508 fn ps_entry_pulls_context_length_from_top_level_or_details() {
509 let entry = serde_json::json!({
510 "name": "qwen3.6:35b-a3b-coding-nvfp4",
511 "size": 4_700_000_000u64,
512 "size_vram": 4_500_000_000u64,
513 "expires_at": "2026-05-14T10:30:00Z",
514 "context_length": 32768
515 });
516 let model = OllamaPsModel::from_ps_entry(&entry).expect("ps entry parses");
517 assert_eq!(model.context_length, Some(32768));
518
519 let entry_nested = serde_json::json!({
520 "name": "qwen3.6:35b-a3b-coding-nvfp4",
521 "details": {"context_length": 16384}
522 });
523 let nested = OllamaPsModel::from_ps_entry(&entry_nested).expect("ps entry parses");
524 assert_eq!(nested.context_length, Some(16384));
525 }
526
527 #[test]
528 fn merge_ollama_ps_preserves_call_level_values() {
529 let mut telemetry = ProviderTelemetry::new(source::OLLAMA_CHAT);
530 telemetry.runtime_loaded_model = Some("real-model".to_string());
531 let ps = OllamaPsModel {
532 name: Some("alias-model".to_string()),
533 size_bytes: Some(1),
534 size_vram_bytes: Some(2),
535 expires_at: Some("forever".to_string()),
536 context_length: Some(8192),
537 };
538 telemetry.merge_ollama_ps(&ps);
539 assert_eq!(
540 telemetry.runtime_loaded_model.as_deref(),
541 Some("real-model")
542 );
543 assert_eq!(telemetry.runtime_memory_bytes, Some(1));
544 assert_eq!(telemetry.runtime_memory_vram_bytes, Some(2));
545 assert_eq!(
546 telemetry.runtime_keep_alive_until.as_deref(),
547 Some("forever")
548 );
549 assert_eq!(telemetry.runtime_context_length, Some(8192));
550 }
551
552 #[test]
553 fn as_vm_dict_returns_none_when_empty() {
554 let telemetry = ProviderTelemetry::default();
555 assert!(telemetry.is_empty());
556 assert!(telemetry.as_vm_dict().is_none());
557 }
558
559 #[test]
560 fn as_vm_dict_serializes_all_present_fields() {
561 let telemetry = ProviderTelemetry {
562 source: source::OLLAMA_CHAT.to_string(),
563 server_total_ms: Some(100),
564 client_wall_ms: Some(120),
565 runtime_loaded_model: Some("qwen".to_string()),
566 ..Default::default()
567 };
568 let value = telemetry.as_vm_dict().expect("dict present");
569 let dict = value.as_dict().expect("dict body");
570 assert_eq!(
571 dict.get("source").map(VmValue::display).as_deref(),
572 Some(source::OLLAMA_CHAT)
573 );
574 assert_eq!(
575 dict.get("server_total_ms").and_then(|v| match v {
576 VmValue::Int(n) => Some(*n),
577 _ => None,
578 }),
579 Some(100)
580 );
581 assert_eq!(
582 dict.get("client_wall_ms").and_then(|v| match v {
583 VmValue::Int(n) => Some(*n),
584 _ => None,
585 }),
586 Some(120)
587 );
588 }
589}