1use chrono::{DateTime, Utc};
4use serde::{Deserialize, Serialize};
5use std::collections::HashMap;
6
7#[derive(Debug, Clone, Serialize, Deserialize)]
9pub struct LLMMetrics {
10 pub prompt_tokens: u32,
12 pub completion_tokens: u32,
14 pub total_tokens: u32,
16 pub time_to_first_token_ms: f64,
18 pub tokens_per_second: f64,
20 pub latency_ms: f64,
22 pub cost_usd: Option<f64>,
24 pub model_name: String,
26 pub timestamp: DateTime<Utc>,
28 pub request_id: Option<String>,
30 pub tags: HashMap<String, String>,
32}
33
34impl LLMMetrics {
35 pub fn new(model_name: &str) -> Self {
37 Self {
38 prompt_tokens: 0,
39 completion_tokens: 0,
40 total_tokens: 0,
41 time_to_first_token_ms: 0.0,
42 tokens_per_second: 0.0,
43 latency_ms: 0.0,
44 cost_usd: None,
45 model_name: model_name.to_string(),
46 timestamp: Utc::now(),
47 request_id: None,
48 tags: HashMap::new(),
49 }
50 }
51
52 pub fn with_tokens(mut self, prompt: u32, completion: u32) -> Self {
54 self.prompt_tokens = prompt;
55 self.completion_tokens = completion;
56 self.total_tokens = prompt + completion;
57 self
58 }
59
60 pub fn with_latency(mut self, latency_ms: f64) -> Self {
62 self.latency_ms = latency_ms;
63 if latency_ms > 0.0 && self.completion_tokens > 0 {
64 self.tokens_per_second = f64::from(self.completion_tokens) / (latency_ms / 1000.0);
65 }
66 self
67 }
68
69 pub fn with_ttft(mut self, ttft_ms: f64) -> Self {
71 self.time_to_first_token_ms = ttft_ms;
72 self
73 }
74
75 pub fn with_cost(mut self, cost_usd: f64) -> Self {
77 self.cost_usd = Some(cost_usd);
78 self
79 }
80
81 pub fn with_request_id(mut self, id: &str) -> Self {
83 self.request_id = Some(id.to_string());
84 self
85 }
86
87 pub fn with_tag(mut self, key: &str, value: &str) -> Self {
89 self.tags.insert(key.to_string(), value.to_string());
90 self
91 }
92
93 pub fn estimate_cost(&self) -> f64 {
100 const PRICING: &[(&str, f64, f64)] = &[
104 ("gpt-4-turbo", 0.01, 0.03),
105 ("gpt-4o", 0.005, 0.015),
106 ("gpt-4", 0.03, 0.06),
107 ("gpt-3.5", 0.0005, 0.0015),
108 ("claude-3-opus", 0.015, 0.075),
109 ("claude-3-sonnet", 0.003, 0.015),
110 ("claude-3-haiku", 0.00025, 0.00125),
111 ("gemini", 0.00025, 0.0005),
112 ("mistral", 0.0002, 0.0006),
113 ("llama", 0.0002, 0.0006),
114 ];
115
116 let (prompt_price, completion_price) = PRICING
117 .iter()
118 .find(|(pattern, _, _)| self.model_name.contains(pattern))
119 .map_or_else(
120 || {
121 eprintln!(
122 "Warning: unknown model '{}' for cost estimation, using conservative default \
123 ($0.001/$0.002 per 1K tokens)",
124 self.model_name
125 );
126 (0.001, 0.002)
127 },
128 |&(_, p, c)| (p, c),
129 );
130
131 let prompt_cost = (f64::from(self.prompt_tokens) / 1000.0) * prompt_price;
132 let completion_cost = (f64::from(self.completion_tokens) / 1000.0) * completion_price;
133 prompt_cost + completion_cost
134 }
135}
136
137#[cfg(test)]
138mod tests {
139 use super::*;
140
141 #[test]
142 fn test_llm_metrics_new() {
143 let metrics = LLMMetrics::new("gpt-4");
144 assert_eq!(metrics.model_name, "gpt-4");
145 assert_eq!(metrics.prompt_tokens, 0);
146 assert_eq!(metrics.completion_tokens, 0);
147 assert_eq!(metrics.total_tokens, 0);
148 assert!(metrics.cost_usd.is_none());
149 assert!(metrics.request_id.is_none());
150 assert!(metrics.tags.is_empty());
151 }
152
153 #[test]
154 fn test_llm_metrics_with_tokens() {
155 let metrics = LLMMetrics::new("gpt-4").with_tokens(100, 50);
156 assert_eq!(metrics.prompt_tokens, 100);
157 assert_eq!(metrics.completion_tokens, 50);
158 assert_eq!(metrics.total_tokens, 150);
159 }
160
161 #[test]
162 fn test_llm_metrics_with_latency() {
163 let metrics = LLMMetrics::new("gpt-4").with_tokens(100, 100).with_latency(1000.0);
164 assert!((metrics.latency_ms - 1000.0).abs() < 1e-9);
165 assert!((metrics.tokens_per_second - 100.0).abs() < 1e-6);
167 }
168
169 #[test]
170 fn test_llm_metrics_with_latency_zero() {
171 let metrics = LLMMetrics::new("gpt-4").with_tokens(100, 100).with_latency(0.0);
172 assert!((metrics.latency_ms - 0.0).abs() < 1e-9);
173 assert!((metrics.tokens_per_second - 0.0).abs() < 1e-9);
175 }
176
177 #[test]
178 fn test_llm_metrics_with_ttft() {
179 let metrics = LLMMetrics::new("gpt-4").with_ttft(150.0);
180 assert!((metrics.time_to_first_token_ms - 150.0).abs() < 1e-9);
181 }
182
183 #[test]
184 fn test_llm_metrics_with_cost() {
185 let metrics = LLMMetrics::new("gpt-4").with_cost(0.05);
186 assert_eq!(metrics.cost_usd, Some(0.05));
187 }
188
189 #[test]
190 fn test_llm_metrics_with_request_id() {
191 let metrics = LLMMetrics::new("gpt-4").with_request_id("req-12345");
192 assert_eq!(metrics.request_id, Some("req-12345".to_string()));
193 }
194
195 #[test]
196 fn test_llm_metrics_with_tag() {
197 let metrics = LLMMetrics::new("gpt-4")
198 .with_tag("environment", "production")
199 .with_tag("user_id", "user123");
200 assert_eq!(metrics.tags.get("environment"), Some(&"production".to_string()));
201 assert_eq!(metrics.tags.get("user_id"), Some(&"user123".to_string()));
202 }
203
204 #[test]
205 fn test_llm_metrics_estimate_cost_gpt4() {
206 let metrics = LLMMetrics::new("gpt-4").with_tokens(1000, 1000);
207 let cost = metrics.estimate_cost();
209 assert!((cost - 0.09).abs() < 0.001);
210 }
211
212 #[test]
213 fn test_llm_metrics_estimate_cost_gpt4_turbo() {
214 let metrics = LLMMetrics::new("gpt-4-turbo").with_tokens(1000, 1000);
215 let cost = metrics.estimate_cost();
217 assert!((cost - 0.04).abs() < 0.001);
218 }
219
220 #[test]
221 fn test_llm_metrics_estimate_cost_gpt35() {
222 let metrics = LLMMetrics::new("gpt-3.5-turbo").with_tokens(1000, 1000);
223 let cost = metrics.estimate_cost();
225 assert!((cost - 0.002).abs() < 0.0001);
226 }
227
228 #[test]
229 fn test_llm_metrics_estimate_cost_claude_opus() {
230 let metrics = LLMMetrics::new("claude-3-opus").with_tokens(1000, 1000);
231 let cost = metrics.estimate_cost();
233 assert!((cost - 0.09).abs() < 0.001);
234 }
235
236 #[test]
237 fn test_llm_metrics_estimate_cost_claude_sonnet() {
238 let metrics = LLMMetrics::new("claude-3-sonnet").with_tokens(1000, 1000);
239 let cost = metrics.estimate_cost();
241 assert!((cost - 0.018).abs() < 0.001);
242 }
243
244 #[test]
245 fn test_llm_metrics_estimate_cost_claude_haiku() {
246 let metrics = LLMMetrics::new("claude-3-haiku").with_tokens(1000, 1000);
247 let cost = metrics.estimate_cost();
249 assert!((cost - 0.0015).abs() < 0.0001);
250 }
251
252 #[test]
253 fn test_llm_metrics_estimate_cost_unknown_model() {
254 let metrics = LLMMetrics::new("some-unknown-model").with_tokens(1000, 1000);
255 let cost = metrics.estimate_cost();
257 assert!((cost - 0.003).abs() < 0.001);
258 }
259
260 #[test]
261 fn test_llm_metrics_clone() {
262 let metrics = LLMMetrics::new("gpt-4").with_tokens(100, 50).with_latency(500.0);
263 let cloned = metrics.clone();
264 assert_eq!(metrics.model_name, cloned.model_name);
265 assert_eq!(metrics.prompt_tokens, cloned.prompt_tokens);
266 }
267
268 #[test]
269 fn test_llm_metrics_serde() {
270 let metrics =
271 LLMMetrics::new("gpt-4").with_tokens(100, 50).with_latency(500.0).with_cost(0.01);
272
273 let json = serde_json::to_string(&metrics).expect("JSON serialization should succeed");
274 let deserialized: LLMMetrics =
275 serde_json::from_str(&json).expect("JSON deserialization should succeed");
276 assert_eq!(metrics.model_name, deserialized.model_name);
277 assert_eq!(metrics.prompt_tokens, deserialized.prompt_tokens);
278 assert_eq!(metrics.cost_usd, deserialized.cost_usd);
279 }
280
281 #[test]
282 fn test_llm_metrics_debug() {
283 let metrics = LLMMetrics::new("gpt-4");
284 let debug_str = format!("{metrics:?}");
285 assert!(debug_str.contains("LLMMetrics"));
286 assert!(debug_str.contains("gpt-4"));
287 }
288
289 #[test]
290 fn test_llm_metrics_chained_builders() {
291 let metrics = LLMMetrics::new("claude-3-opus")
292 .with_tokens(500, 200)
293 .with_latency(2000.0)
294 .with_ttft(100.0)
295 .with_cost(0.05)
296 .with_request_id("req-abc")
297 .with_tag("feature", "summarization");
298
299 assert_eq!(metrics.model_name, "claude-3-opus");
300 assert_eq!(metrics.total_tokens, 700);
301 assert!((metrics.latency_ms - 2000.0).abs() < 1e-9);
302 assert!((metrics.time_to_first_token_ms - 100.0).abs() < 1e-9);
303 assert_eq!(metrics.cost_usd, Some(0.05));
304 assert_eq!(metrics.request_id, Some("req-abc".to_string()));
305 assert_eq!(metrics.tags.get("feature"), Some(&"summarization".to_string()));
306 }
307
308 #[test]
313 fn test_falsify_n07_gpt4_turbo_before_gpt4() {
314 let turbo = LLMMetrics::new("gpt-4-turbo-preview").with_tokens(1000, 0);
317 let base = LLMMetrics::new("gpt-4-0613").with_tokens(1000, 0);
318
319 let turbo_cost = turbo.estimate_cost();
320 let base_cost = base.estimate_cost();
321
322 assert!(
323 turbo_cost < base_cost,
324 "gpt-4-turbo-preview ({turbo_cost}) must be cheaper than gpt-4 ({base_cost})"
325 );
326 }
327
328 #[test]
329 fn test_falsify_n07_gpt4o_distinct_from_gpt4() {
330 let gpt4o = LLMMetrics::new("gpt-4o-2024-05-13").with_tokens(1000, 1000);
332 let gpt4 = LLMMetrics::new("gpt-4-0613").with_tokens(1000, 1000);
333
334 let gpt4o_cost = gpt4o.estimate_cost();
335 let gpt4_cost = gpt4.estimate_cost();
336
337 assert!(
338 gpt4o_cost < gpt4_cost,
339 "gpt-4o ({gpt4o_cost}) must be cheaper than gpt-4 ({gpt4_cost})"
340 );
341 }
342
343 #[test]
344 fn test_falsify_n07_unknown_model_uses_conservative_default() {
345 let metrics = LLMMetrics::new("totally-unknown-model-v9").with_tokens(1000, 1000);
347 let cost = metrics.estimate_cost();
348
349 assert!(cost > 0.0, "Unknown model cost must be > 0, got {cost}");
350 assert!((cost - 0.003).abs() < 1e-6, "Expected conservative default ~$0.003, got {cost}");
352 }
353
354 #[test]
355 fn test_estimate_cost_all_model_variants() {
356 let models = [
358 ("gpt-4-turbo-preview", 0.01 + 0.03),
359 ("gpt-4o-2024-05-13", 0.005 + 0.015),
360 ("gpt-4-0613", 0.03 + 0.06),
361 ("gpt-3.5-turbo", 0.0005 + 0.0015),
362 ("claude-3-opus-20240229", 0.015 + 0.075),
363 ("claude-3-sonnet-20240229", 0.003 + 0.015),
364 ("claude-3-haiku-20240307", 0.00025 + 0.00125),
365 ("gemini-pro", 0.00025 + 0.0005),
366 ("mistral-medium", 0.0002 + 0.0006),
367 ("llama-3-70b", 0.0002 + 0.0006),
368 ("unknown-model", 0.001 + 0.002),
369 ];
370
371 for (model_name, expected_cost) in &models {
372 let metrics = LLMMetrics::new(model_name).with_tokens(1000, 1000);
373 let cost = metrics.estimate_cost();
374 assert!(
375 (cost - expected_cost).abs() < 1e-6,
376 "cost mismatch for {model_name}: got {cost}, expected {expected_cost}"
377 );
378 }
379 }
380}