1use std::collections::BTreeMap;
19
20use crate::value::VmValue;
21
22pub mod source {
26 pub const OLLAMA_CHAT: &str = "ollama_chat";
28 pub const OLLAMA_GENERATE: &str = "ollama_generate";
30 pub const OPENAI_USAGE: &str = "openai_usage";
34 pub const LLAMACPP_TIMINGS: &str = "llamacpp_timings";
38 pub const ANTHROPIC_USAGE: &str = "anthropic_usage";
40 pub const GEMINI_USAGE: &str = "gemini_usage";
42 pub const UNKNOWN: &str = "unknown";
46}
47
48pub(crate) fn elapsed_ms(started: std::time::Instant) -> u64 {
49 started.elapsed().as_millis().min(u128::from(u64::MAX)) as u64
50}
51
52#[derive(Clone, Debug, Default, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
58pub struct ProviderTelemetry {
59 #[serde(default, skip_serializing_if = "String::is_empty")]
63 pub source: String,
64 #[serde(skip_serializing_if = "Option::is_none")]
66 pub server_total_ms: Option<u64>,
67 #[serde(skip_serializing_if = "Option::is_none")]
70 pub server_load_ms: Option<u64>,
71 #[serde(skip_serializing_if = "Option::is_none")]
75 pub server_prompt_eval_ms: Option<u64>,
76 #[serde(skip_serializing_if = "Option::is_none")]
78 pub server_generation_ms: Option<u64>,
79 #[serde(skip_serializing_if = "Option::is_none")]
84 pub server_prompt_tokens: Option<i64>,
85 #[serde(skip_serializing_if = "Option::is_none")]
87 pub server_output_tokens: Option<i64>,
88 #[serde(skip_serializing_if = "Option::is_none")]
92 pub client_wall_ms: Option<u64>,
93 #[serde(skip_serializing_if = "Option::is_none")]
96 pub runtime_context_length: Option<u64>,
97 #[serde(skip_serializing_if = "Option::is_none")]
100 pub runtime_loaded_model: Option<String>,
101 #[serde(skip_serializing_if = "Option::is_none")]
103 pub runtime_memory_bytes: Option<u64>,
104 #[serde(skip_serializing_if = "Option::is_none")]
106 pub runtime_memory_vram_bytes: Option<u64>,
107 #[serde(skip_serializing_if = "Option::is_none")]
109 pub runtime_keep_alive_until: Option<String>,
110 #[serde(skip_serializing_if = "Option::is_none")]
112 pub request_id: Option<String>,
113}
114
115impl ProviderTelemetry {
116 pub fn new(source: &str) -> Self {
117 Self {
118 source: source.to_string(),
119 ..Self::default()
120 }
121 }
122
123 pub fn is_empty(&self) -> bool {
127 let Self {
128 source,
129 server_total_ms,
130 server_load_ms,
131 server_prompt_eval_ms,
132 server_generation_ms,
133 server_prompt_tokens,
134 server_output_tokens,
135 client_wall_ms,
136 runtime_context_length,
137 runtime_loaded_model,
138 runtime_memory_bytes,
139 runtime_memory_vram_bytes,
140 runtime_keep_alive_until,
141 request_id,
142 } = self;
143 source.is_empty()
144 && server_total_ms.is_none()
145 && server_load_ms.is_none()
146 && server_prompt_eval_ms.is_none()
147 && server_generation_ms.is_none()
148 && server_prompt_tokens.is_none()
149 && server_output_tokens.is_none()
150 && client_wall_ms.is_none()
151 && runtime_context_length.is_none()
152 && runtime_loaded_model.is_none()
153 && runtime_memory_bytes.is_none()
154 && runtime_memory_vram_bytes.is_none()
155 && runtime_keep_alive_until.is_none()
156 && request_id.is_none()
157 }
158
159 pub fn ns_to_ms(ns: u64) -> u64 {
163 ns / 1_000_000
167 }
168
169 pub fn from_ollama_done(frame: &serde_json::Value, source: &str) -> Self {
173 let mut telemetry = Self::new(source);
174 telemetry.server_total_ms = ns_field(frame, "total_duration");
175 telemetry.server_load_ms = ns_field(frame, "load_duration");
176 telemetry.server_prompt_eval_ms = ns_field(frame, "prompt_eval_duration");
177 telemetry.server_generation_ms = ns_field(frame, "eval_duration");
178 telemetry.server_prompt_tokens = frame
179 .get("prompt_eval_count")
180 .and_then(serde_json::Value::as_i64);
181 telemetry.server_output_tokens =
182 frame.get("eval_count").and_then(serde_json::Value::as_i64);
183 if let Some(model) = frame.get("model").and_then(serde_json::Value::as_str) {
184 telemetry.runtime_loaded_model = Some(model.to_string());
185 }
186 telemetry
187 }
188
189 pub fn from_openai_usage(usage: &serde_json::Value, request_id: Option<&str>) -> Self {
193 let mut telemetry = Self::new(source::OPENAI_USAGE);
194 telemetry.server_prompt_tokens = usage
195 .get("prompt_tokens")
196 .or_else(|| usage.get("input_tokens"))
197 .and_then(serde_json::Value::as_i64);
198 telemetry.server_output_tokens = usage
199 .get("completion_tokens")
200 .or_else(|| usage.get("output_tokens"))
201 .and_then(serde_json::Value::as_i64);
202 if let Some(timings) = usage.get("timings").filter(|value| value.is_object()) {
203 telemetry.source = source::LLAMACPP_TIMINGS.to_string();
204 telemetry.server_prompt_eval_ms = ms_or_round(timings.get("prompt_ms"));
205 telemetry.server_generation_ms = ms_or_round(timings.get("predicted_ms"));
206 if let Some(prefill) = timings.get("prompt_n").and_then(serde_json::Value::as_i64) {
211 telemetry.server_prompt_tokens = Some(prefill);
212 }
213 if let Some(predicted) = timings
214 .get("predicted_n")
215 .and_then(serde_json::Value::as_i64)
216 {
217 telemetry.server_output_tokens = Some(predicted);
218 }
219 let total = telemetry
220 .server_prompt_eval_ms
221 .unwrap_or(0)
222 .saturating_add(telemetry.server_generation_ms.unwrap_or(0));
223 if total > 0 {
224 telemetry.server_total_ms = Some(total);
225 }
226 }
227 if let Some(request_id) = request_id.filter(|value| !value.is_empty()) {
228 telemetry.request_id = Some(request_id.to_string());
229 }
230 telemetry
231 }
232
233 pub fn from_anthropic_usage(usage: &serde_json::Value, request_id: Option<&str>) -> Self {
238 let mut telemetry = Self::new(source::ANTHROPIC_USAGE);
239 telemetry.server_prompt_tokens = usage
240 .get("input_tokens")
241 .and_then(serde_json::Value::as_i64);
242 telemetry.server_output_tokens = usage
243 .get("output_tokens")
244 .and_then(serde_json::Value::as_i64);
245 if let Some(request_id) = request_id.filter(|value| !value.is_empty()) {
246 telemetry.request_id = Some(request_id.to_string());
247 }
248 telemetry
249 }
250
251 pub fn from_gemini_usage(usage: &serde_json::Value, request_id: Option<&str>) -> Self {
255 let mut telemetry = Self::new(source::GEMINI_USAGE);
256 telemetry.server_prompt_tokens = usage
257 .get("promptTokenCount")
258 .and_then(serde_json::Value::as_i64);
259 telemetry.server_output_tokens = usage
260 .get("candidatesTokenCount")
261 .and_then(serde_json::Value::as_i64);
262 if let Some(request_id) = request_id.filter(|value| !value.is_empty()) {
263 telemetry.request_id = Some(request_id.to_string());
264 }
265 telemetry
266 }
267
268 pub fn merge_ollama_ps(&mut self, ps: &OllamaPsModel) {
272 if self.runtime_loaded_model.is_none() {
273 self.runtime_loaded_model = ps.name.clone();
274 }
275 if self.runtime_context_length.is_none() {
276 self.runtime_context_length = ps.context_length;
277 }
278 if self.runtime_memory_bytes.is_none() {
279 self.runtime_memory_bytes = ps.size_bytes;
280 }
281 if self.runtime_memory_vram_bytes.is_none() {
282 self.runtime_memory_vram_bytes = ps.size_vram_bytes;
283 }
284 if self.runtime_keep_alive_until.is_none() {
285 self.runtime_keep_alive_until = ps.expires_at.clone();
286 }
287 }
288
289 pub fn as_vm_dict(&self) -> Option<VmValue> {
293 if self.is_empty() {
294 return None;
295 }
296 let mut dict: BTreeMap<String, VmValue> = BTreeMap::new();
297 if !self.source.is_empty() {
298 dict.insert(
299 "source".to_string(),
300 VmValue::String(std::sync::Arc::from(self.source.as_str())),
301 );
302 }
303 insert_opt_u64(&mut dict, "server_total_ms", self.server_total_ms);
304 insert_opt_u64(&mut dict, "server_load_ms", self.server_load_ms);
305 insert_opt_u64(
306 &mut dict,
307 "server_prompt_eval_ms",
308 self.server_prompt_eval_ms,
309 );
310 insert_opt_u64(&mut dict, "server_generation_ms", self.server_generation_ms);
311 insert_opt_i64(&mut dict, "server_prompt_tokens", self.server_prompt_tokens);
312 insert_opt_i64(&mut dict, "server_output_tokens", self.server_output_tokens);
313 insert_opt_u64(&mut dict, "client_wall_ms", self.client_wall_ms);
314 insert_opt_u64(
315 &mut dict,
316 "runtime_context_length",
317 self.runtime_context_length,
318 );
319 if let Some(ref model) = self.runtime_loaded_model {
320 dict.insert(
321 "runtime_loaded_model".to_string(),
322 VmValue::String(std::sync::Arc::from(model.as_str())),
323 );
324 }
325 insert_opt_u64(&mut dict, "runtime_memory_bytes", self.runtime_memory_bytes);
326 insert_opt_u64(
327 &mut dict,
328 "runtime_memory_vram_bytes",
329 self.runtime_memory_vram_bytes,
330 );
331 if let Some(ref expires) = self.runtime_keep_alive_until {
332 dict.insert(
333 "runtime_keep_alive_until".to_string(),
334 VmValue::String(std::sync::Arc::from(expires.as_str())),
335 );
336 }
337 if let Some(ref request_id) = self.request_id {
338 dict.insert(
339 "request_id".to_string(),
340 VmValue::String(std::sync::Arc::from(request_id.as_str())),
341 );
342 }
343 Some(VmValue::Dict(std::sync::Arc::new(dict)))
344 }
345}
346
347#[derive(Clone, Debug, Default, PartialEq, Eq)]
350pub struct OllamaPsModel {
351 pub name: Option<String>,
352 pub size_bytes: Option<u64>,
353 pub size_vram_bytes: Option<u64>,
354 pub expires_at: Option<String>,
355 pub context_length: Option<u64>,
356}
357
358impl OllamaPsModel {
359 pub fn from_ps_entry(entry: &serde_json::Value) -> Option<Self> {
363 let name = entry
364 .get("name")
365 .and_then(serde_json::Value::as_str)
366 .or_else(|| entry.get("model").and_then(serde_json::Value::as_str))
367 .map(str::to_string);
368 let context_length = entry
369 .get("context_length")
370 .and_then(serde_json::Value::as_u64)
371 .or_else(|| {
372 entry
373 .get("details")
374 .and_then(|details| details.get("context_length"))
375 .and_then(serde_json::Value::as_u64)
376 });
377 Some(Self {
378 name,
379 size_bytes: entry.get("size").and_then(serde_json::Value::as_u64),
380 size_vram_bytes: entry.get("size_vram").and_then(serde_json::Value::as_u64),
381 expires_at: entry
382 .get("expires_at")
383 .and_then(serde_json::Value::as_str)
384 .map(str::to_string),
385 context_length,
386 })
387 }
388}
389
390fn ns_field(frame: &serde_json::Value, key: &str) -> Option<u64> {
391 frame
392 .get(key)
393 .and_then(serde_json::Value::as_u64)
394 .map(ProviderTelemetry::ns_to_ms)
395}
396
397fn ms_or_round(value: Option<&serde_json::Value>) -> Option<u64> {
398 let value = value?;
399 if let Some(n) = value.as_u64() {
400 return Some(n);
401 }
402 value.as_f64().map(|n| n.round().max(0.0) as u64)
403}
404
405fn insert_opt_u64(dict: &mut BTreeMap<String, VmValue>, key: &str, value: Option<u64>) {
406 if let Some(value) = value {
407 dict.insert(key.to_string(), VmValue::Int(value as i64));
408 }
409}
410
411fn insert_opt_i64(dict: &mut BTreeMap<String, VmValue>, key: &str, value: Option<i64>) {
412 if let Some(value) = value {
413 dict.insert(key.to_string(), VmValue::Int(value));
414 }
415}
416
417#[cfg(test)]
418mod tests {
419 use super::*;
420
421 #[test]
422 fn ollama_done_frame_extracts_full_breakdown() {
423 let frame = serde_json::json!({
424 "model": "devstral-small-2:24b",
425 "total_duration": 7_400_000_000u64,
426 "load_duration": 400_000_000u64,
427 "prompt_eval_duration": 1_200_000_000u64,
428 "eval_duration": 5_800_000_000u64,
429 "prompt_eval_count": 1024,
430 "eval_count": 64
431 });
432
433 let telemetry = ProviderTelemetry::from_ollama_done(&frame, source::OLLAMA_CHAT);
434
435 assert_eq!(telemetry.source, source::OLLAMA_CHAT);
436 assert_eq!(telemetry.server_total_ms, Some(7400));
437 assert_eq!(telemetry.server_load_ms, Some(400));
438 assert_eq!(telemetry.server_prompt_eval_ms, Some(1200));
439 assert_eq!(telemetry.server_generation_ms, Some(5800));
440 assert_eq!(telemetry.server_prompt_tokens, Some(1024));
441 assert_eq!(telemetry.server_output_tokens, Some(64));
442 assert_eq!(
443 telemetry.runtime_loaded_model.as_deref(),
444 Some("devstral-small-2:24b")
445 );
446 assert!(!telemetry.is_empty());
447 }
448
449 #[test]
450 fn ollama_done_frame_leaves_missing_fields_as_none() {
451 let frame = serde_json::json!({
452 "model": "devstral-small-2:24b",
453 });
455
456 let telemetry = ProviderTelemetry::from_ollama_done(&frame, source::OLLAMA_CHAT);
457
458 assert_eq!(telemetry.server_total_ms, None);
459 assert_eq!(telemetry.server_load_ms, None);
460 assert_eq!(telemetry.server_prompt_eval_ms, None);
461 assert_eq!(telemetry.server_generation_ms, None);
462 assert_eq!(telemetry.server_prompt_tokens, None);
463 assert_eq!(telemetry.server_output_tokens, None);
464 }
465
466 #[test]
467 fn openai_usage_partial_extracts_counts_only() {
468 let usage = serde_json::json!({
469 "prompt_tokens": 200,
470 "completion_tokens": 50
471 });
472
473 let telemetry = ProviderTelemetry::from_openai_usage(&usage, Some("req-abc"));
474
475 assert_eq!(telemetry.source, source::OPENAI_USAGE);
476 assert_eq!(telemetry.server_prompt_tokens, Some(200));
477 assert_eq!(telemetry.server_output_tokens, Some(50));
478 assert_eq!(telemetry.server_prompt_eval_ms, None);
479 assert_eq!(telemetry.request_id.as_deref(), Some("req-abc"));
480 }
481
482 #[test]
483 fn llamacpp_timings_promotes_source_and_fills_durations() {
484 let usage = serde_json::json!({
485 "prompt_tokens": 220,
486 "completion_tokens": 17,
487 "timings": {
488 "prompt_n": 200,
489 "prompt_ms": 145.4,
490 "predicted_n": 17,
491 "predicted_ms": 89.1,
492 }
493 });
494
495 let telemetry = ProviderTelemetry::from_openai_usage(&usage, None);
496
497 assert_eq!(telemetry.source, source::LLAMACPP_TIMINGS);
498 assert_eq!(telemetry.server_prompt_eval_ms, Some(145));
499 assert_eq!(telemetry.server_generation_ms, Some(89));
500 assert_eq!(telemetry.server_total_ms, Some(234));
501 assert_eq!(telemetry.server_prompt_tokens, Some(200));
502 assert_eq!(telemetry.server_output_tokens, Some(17));
503 assert!(!telemetry.is_empty());
504 }
505
506 #[test]
507 fn ps_entry_pulls_context_length_from_top_level_or_details() {
508 let entry = serde_json::json!({
509 "name": "devstral-small-2:24b",
510 "size": 4_700_000_000u64,
511 "size_vram": 4_500_000_000u64,
512 "expires_at": "2026-05-14T10:30:00Z",
513 "context_length": 32768
514 });
515 let model = OllamaPsModel::from_ps_entry(&entry).expect("ps entry parses");
516 assert_eq!(model.context_length, Some(32768));
517
518 let entry_nested = serde_json::json!({
519 "name": "devstral-small-2:24b",
520 "details": {"context_length": 16384}
521 });
522 let nested = OllamaPsModel::from_ps_entry(&entry_nested).expect("ps entry parses");
523 assert_eq!(nested.context_length, Some(16384));
524 }
525
526 #[test]
527 fn merge_ollama_ps_preserves_call_level_values() {
528 let mut telemetry = ProviderTelemetry::new(source::OLLAMA_CHAT);
529 telemetry.runtime_loaded_model = Some("real-model".to_string());
530 let ps = OllamaPsModel {
531 name: Some("alias-model".to_string()),
532 size_bytes: Some(1),
533 size_vram_bytes: Some(2),
534 expires_at: Some("forever".to_string()),
535 context_length: Some(8192),
536 };
537 telemetry.merge_ollama_ps(&ps);
538 assert_eq!(
539 telemetry.runtime_loaded_model.as_deref(),
540 Some("real-model")
541 );
542 assert_eq!(telemetry.runtime_memory_bytes, Some(1));
543 assert_eq!(telemetry.runtime_memory_vram_bytes, Some(2));
544 assert_eq!(
545 telemetry.runtime_keep_alive_until.as_deref(),
546 Some("forever")
547 );
548 assert_eq!(telemetry.runtime_context_length, Some(8192));
549 }
550
551 #[test]
552 fn as_vm_dict_returns_none_when_empty() {
553 let telemetry = ProviderTelemetry::default();
554 assert!(telemetry.is_empty());
555 assert!(telemetry.as_vm_dict().is_none());
556 }
557
558 #[test]
559 fn as_vm_dict_serializes_all_present_fields() {
560 let telemetry = ProviderTelemetry {
561 source: source::OLLAMA_CHAT.to_string(),
562 server_total_ms: Some(100),
563 client_wall_ms: Some(120),
564 runtime_loaded_model: Some("qwen".to_string()),
565 ..Default::default()
566 };
567 let value = telemetry.as_vm_dict().expect("dict present");
568 let dict = value.as_dict().expect("dict body");
569 assert_eq!(
570 dict.get("source").map(VmValue::display).as_deref(),
571 Some(source::OLLAMA_CHAT)
572 );
573 assert_eq!(
574 dict.get("server_total_ms").and_then(|v| match v {
575 VmValue::Int(n) => Some(*n),
576 _ => None,
577 }),
578 Some(100)
579 );
580 assert_eq!(
581 dict.get("client_wall_ms").and_then(|v| match v {
582 VmValue::Int(n) => Some(*n),
583 _ => None,
584 }),
585 Some(120)
586 );
587 }
588}