1use std::collections::HashMap;
2
3use serde::{Deserialize, Serialize};
4
5use crate::{Database, DbResultExt};
6use roboticus_core::Result;
7
8#[derive(Debug, Clone, Serialize, Deserialize)]
9pub struct MemoryImpact {
10 pub with_memory: f64,
11 pub without_memory: f64,
12}
13
14#[derive(Debug, Clone, Serialize, Deserialize)]
15pub struct QualityMetrics {
16 pub avg_grade: f64,
17 pub grade_count: i64,
18 pub grade_coverage: f64,
19 pub cost_per_quality_point: f64,
20 pub by_complexity: HashMap<String, f64>,
21 pub memory_impact: MemoryImpact,
22 pub trend: String,
23}
24
25#[derive(Debug, Clone, Serialize, Deserialize)]
26pub struct ModelEfficiency {
27 pub model: String,
28 pub total_turns: i64,
29 pub avg_output_density: f64,
30 pub avg_budget_utilization: f64,
31 pub avg_memory_roi: f64,
32 pub avg_system_prompt_weight: f64,
33 pub cache_hit_rate: f64,
34 pub context_pressure_rate: f64,
35 pub cost: CostMetrics,
36 pub trend: TrendMetrics,
37 pub quality: Option<QualityMetrics>,
38}
39
40#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct CostMetrics {
42 pub total: f64,
43 pub per_output_token: f64,
44 pub effective_per_turn: f64,
45 pub cache_savings: f64,
46 pub cumulative_trend: String,
47 pub attribution: CostAttribution,
48 pub wasted_budget_cost: f64,
49}
50
51#[derive(Debug, Clone, Serialize, Deserialize)]
52pub struct CostAttribution {
53 pub system_prompt: AttributionDetail,
54 pub memories: AttributionDetail,
55 pub history: AttributionDetail,
56}
57
58#[derive(Debug, Clone, Serialize, Deserialize)]
59pub struct AttributionDetail {
60 pub tokens: i64,
61 pub cost: f64,
62 pub pct: f64,
63}
64
65#[derive(Debug, Clone, Serialize, Deserialize)]
66pub struct TrendMetrics {
67 pub output_density: String,
68 pub cost_per_turn: String,
69 pub cache_hit_rate: String,
70}
71
72#[derive(Debug, Clone, Serialize, Deserialize)]
73pub struct TimeSeriesPoint {
74 pub bucket: String,
75 pub model: String,
76 pub output_density: f64,
77 pub cost: f64,
78 pub turns: i64,
79 pub budget_utilization: f64,
80 pub cached_count: i64,
81}
82
83#[derive(Debug, Clone, Serialize, Deserialize)]
84pub struct EfficiencyTotals {
85 pub total_cost: f64,
86 pub total_cache_savings: f64,
87 pub total_turns: i64,
88 pub most_expensive_model: Option<String>,
89 pub most_efficient_model: Option<String>,
90 pub biggest_cost_driver: String,
91}
92
93#[derive(Debug, Clone, Serialize, Deserialize)]
94pub struct EfficiencyReport {
95 pub period: String,
96 pub models: HashMap<String, ModelEfficiency>,
97 pub time_series: Vec<TimeSeriesPoint>,
98 pub totals: EfficiencyTotals,
99}
100
101fn round6(v: f64) -> f64 {
103 (v * 1_000_000.0).round() / 1_000_000.0
104}
105
106fn cutoff_expr(period: &str) -> &'static str {
107 match period {
108 "1h" => "datetime('now', '-1 hour')",
109 "24h" => "datetime('now', '-1 day')",
110 "7d" => "datetime('now', '-7 days')",
111 "30d" => "datetime('now', '-30 days')",
112 _ => "datetime('1970-01-01')",
113 }
114}
115
116struct RawModelRow {
117 model: String,
118 total_turns: i64,
119 avg_output_density: f64,
120 total_cost: f64,
121 total_tokens_out: i64,
122 total_tokens_in: i64,
123 cached_count: i64,
124 avg_cost_per_turn: f64,
125}
126
127fn trend_label(first_half: f64, second_half: f64) -> String {
128 if first_half == 0.0 && second_half == 0.0 {
129 return "stable".into();
130 }
131 let delta = second_half - first_half;
132 let base = first_half.max(0.001);
133 let pct = delta / base;
134 if pct > 0.05 {
135 "increasing".into()
136 } else if pct < -0.05 {
137 "decreasing".into()
138 } else {
139 "stable".into()
140 }
141}
142
143fn compute_quality_for_model(
144 conn: &rusqlite::Connection,
145 model: &str,
146 cutoff: &str,
147 total_turns: i64,
148) -> Option<QualityMetrics> {
149 let sql = format!(
150 "SELECT tf.grade, t.model, t.cost \
151 FROM turn_feedback tf \
152 JOIN turns t ON t.id = tf.turn_id \
153 WHERE t.model = ?1 AND tf.created_at >= {cutoff}"
154 );
155 let mut stmt = conn.prepare(&sql)
156 .inspect_err(|e| tracing::warn!(model, error = %e, "efficiency: failed to prepare quality metrics query"))
157 .ok()?;
158 let rows: Vec<(i32, String, f64)> = stmt
159 .query_map(rusqlite::params![model], |row| {
160 Ok((
161 row.get::<_, i32>(0)?,
162 row.get::<_, String>(1)?,
163 row.get::<_, f64>(2).unwrap_or(0.0),
164 ))
165 })
166 .inspect_err(|e| tracing::warn!(model, error = %e, "efficiency: failed to execute quality metrics query"))
167 .ok()?
168 .collect::<std::result::Result<Vec<_>, _>>()
169 .inspect_err(|e| tracing::warn!(model, error = %e, "efficiency: failed to collect quality metrics rows"))
170 .ok()?;
171
172 if rows.is_empty() {
173 return None;
174 }
175
176 let grade_count = rows.len() as i64;
177 let sum_grade: i64 = rows.iter().map(|(g, _, _)| *g as i64).sum();
178 let avg_grade = sum_grade as f64 / grade_count as f64;
179 let grade_coverage = if total_turns > 0 {
180 grade_count as f64 / total_turns as f64
181 } else {
182 0.0
183 };
184
185 let total_cost: f64 = rows.iter().map(|(_, _, c)| c).sum();
186 let total_quality: f64 = rows.iter().map(|(g, _, _)| *g as f64).sum();
187 let cost_per_quality_point = if total_quality > 0.0 {
188 total_cost / total_quality
189 } else {
190 0.0
191 };
192
193 let half = rows.len() / 2;
194 let trend = if rows.len() >= 4 {
195 let first_avg = rows[..half].iter().map(|(g, _, _)| *g as f64).sum::<f64>() / half as f64;
196 let second_avg = rows[half..].iter().map(|(g, _, _)| *g as f64).sum::<f64>()
197 / (rows.len() - half) as f64;
198 trend_label(first_avg, second_avg)
199 } else {
200 "stable".into()
201 };
202
203 Some(QualityMetrics {
204 avg_grade,
205 grade_count,
206 grade_coverage,
207 cost_per_quality_point,
208 by_complexity: HashMap::new(),
209 memory_impact: MemoryImpact {
210 with_memory: 0.0,
211 without_memory: 0.0,
212 },
213 trend,
214 })
215}
216
217pub fn compute_efficiency(
218 db: &Database,
219 period: &str,
220 model_filter: Option<&str>,
221) -> Result<EfficiencyReport> {
222 let cutoff = cutoff_expr(period);
223 let conn = db.conn();
224
225 let model_clause = if model_filter.is_some() {
226 " AND model = ?1"
227 } else {
228 ""
229 };
230
231 let main_sql = format!(
233 "SELECT \
234 model, \
235 COUNT(*) AS total_turns, \
236 AVG(CAST(tokens_out AS REAL) / NULLIF(tokens_in, 0)) AS avg_output_density, \
237 SUM(cost) AS total_cost, \
238 SUM(tokens_out) AS total_tokens_out, \
239 SUM(tokens_in) AS total_tokens_in, \
240 SUM(CASE WHEN cached = 1 THEN 1 ELSE 0 END) AS cached_count, \
241 AVG(cost) AS avg_cost_per_turn \
242 FROM inference_costs \
243 WHERE created_at >= {cutoff}{model_clause} \
244 GROUP BY model \
245 ORDER BY total_cost DESC"
246 );
247
248 let mut stmt = conn.prepare(&main_sql).db_err()?;
249
250 let map_row = |row: &rusqlite::Row| -> rusqlite::Result<RawModelRow> {
251 Ok(RawModelRow {
252 model: row.get(0)?,
253 total_turns: row.get(1)?,
254 avg_output_density: row.get::<_, Option<f64>>(2)?.unwrap_or(0.0),
255 total_cost: row.get::<_, Option<f64>>(3)?.unwrap_or(0.0),
256 total_tokens_out: row.get::<_, Option<i64>>(4)?.unwrap_or(0),
257 total_tokens_in: row.get::<_, Option<i64>>(5)?.unwrap_or(0),
258 cached_count: row.get::<_, Option<i64>>(6)?.unwrap_or(0),
259 avg_cost_per_turn: row.get::<_, Option<f64>>(7)?.unwrap_or(0.0),
260 })
261 };
262
263 let rows: Vec<RawModelRow> = if let Some(mf) = model_filter {
264 stmt.query_map(rusqlite::params![mf], map_row)
265 } else {
266 stmt.query_map([], map_row)
267 }
268 .db_err()?
269 .collect::<std::result::Result<Vec<_>, _>>()
270 .db_err()?;
271
272 let ts_sql = format!(
274 "SELECT \
275 strftime('%Y-%m-%d', created_at) AS bucket, \
276 model, \
277 AVG(CAST(tokens_out AS REAL) / NULLIF(tokens_in, 0)) AS output_density, \
278 SUM(cost) AS cost, \
279 COUNT(*) AS turns, \
280 SUM(CASE WHEN cached = 1 THEN 1 ELSE 0 END) AS cached_count \
281 FROM inference_costs \
282 WHERE created_at >= {cutoff}{model_clause} \
283 GROUP BY bucket, model \
284 ORDER BY bucket"
285 );
286
287 let mut ts_stmt = conn.prepare(&ts_sql).db_err()?;
288
289 let ts_map = |row: &rusqlite::Row| -> rusqlite::Result<TimeSeriesPoint> {
290 Ok(TimeSeriesPoint {
291 bucket: row.get(0)?,
292 model: row.get(1)?,
293 output_density: row.get::<_, Option<f64>>(2)?.unwrap_or(0.0),
294 cost: row.get::<_, Option<f64>>(3)?.unwrap_or(0.0),
295 turns: row.get(4)?,
296 budget_utilization: 0.0,
297 cached_count: row.get::<_, Option<i64>>(5)?.unwrap_or(0),
298 })
299 };
300
301 let time_series: Vec<TimeSeriesPoint> = if let Some(mf) = model_filter {
302 ts_stmt.query_map(rusqlite::params![mf], ts_map)
303 } else {
304 ts_stmt.query_map([], ts_map)
305 }
306 .db_err()?
307 .collect::<std::result::Result<Vec<_>, _>>()
308 .db_err()?;
309
310 let mut model_ts: HashMap<String, Vec<&TimeSeriesPoint>> = HashMap::new();
312 for pt in &time_series {
313 model_ts.entry(pt.model.clone()).or_default().push(pt);
314 }
315
316 let mut models: HashMap<String, ModelEfficiency> = HashMap::new();
318 let mut grand_total_cost = 0.0_f64;
319 let mut grand_total_turns = 0_i64;
320 let mut most_expensive: Option<(String, f64)> = None;
321 let mut most_efficient: Option<(String, f64)> = None;
322
323 for r in &rows {
324 let cache_hit_rate = if r.total_turns > 0 {
325 r.cached_count as f64 / r.total_turns as f64
326 } else {
327 0.0
328 };
329
330 let per_output_token = if r.total_tokens_out > 0 {
331 r.total_cost / r.total_tokens_out as f64
332 } else {
333 0.0
334 };
335
336 let input_fraction = if r.total_tokens_in + r.total_tokens_out > 0 {
339 r.total_tokens_in as f64 / (r.total_tokens_in + r.total_tokens_out) as f64
340 } else {
341 0.5
342 };
343 let cache_savings = r.cached_count as f64 * r.avg_cost_per_turn * input_fraction;
344
345 let pts = model_ts.get(&r.model).cloned().unwrap_or_default();
347 let trend = if pts.len() >= 2 {
348 let mid = pts.len() / 2;
349 let (first, second) = pts.split_at(mid);
350
351 let avg = |slice: &[&TimeSeriesPoint], f: fn(&TimeSeriesPoint) -> f64| -> f64 {
352 if slice.is_empty() {
353 return 0.0;
354 }
355 slice.iter().map(|p| f(p)).sum::<f64>() / slice.len() as f64
356 };
357
358 let first_density = avg(first, |p| p.output_density);
359 let second_density = avg(second, |p| p.output_density);
360
361 let first_cpt = avg(first, |p| {
362 if p.turns > 0 {
363 p.cost / p.turns as f64
364 } else {
365 0.0
366 }
367 });
368 let second_cpt = avg(second, |p| {
369 if p.turns > 0 {
370 p.cost / p.turns as f64
371 } else {
372 0.0
373 }
374 });
375
376 let first_cache = avg(first, |p| {
377 if p.turns > 0 {
378 p.cached_count as f64 / p.turns as f64
379 } else {
380 0.0
381 }
382 });
383 let second_cache = avg(second, |p| {
384 if p.turns > 0 {
385 p.cached_count as f64 / p.turns as f64
386 } else {
387 0.0
388 }
389 });
390
391 TrendMetrics {
392 output_density: trend_label(first_density, second_density),
393 cost_per_turn: trend_label(first_cpt, second_cpt),
394 cache_hit_rate: trend_label(first_cache, second_cache),
395 }
396 } else {
397 TrendMetrics {
398 output_density: "stable".into(),
399 cost_per_turn: "stable".into(),
400 cache_hit_rate: "stable".into(),
401 }
402 };
403
404 let cumulative_trend = trend.cost_per_turn.clone();
405
406 let attribution = CostAttribution {
408 system_prompt: AttributionDetail {
409 tokens: 0,
410 cost: 0.0,
411 pct: 0.0,
412 },
413 memories: AttributionDetail {
414 tokens: 0,
415 cost: 0.0,
416 pct: 0.0,
417 },
418 history: AttributionDetail {
419 tokens: r.total_tokens_in,
420 cost: r.total_cost * input_fraction,
421 pct: 100.0,
422 },
423 };
424
425 let quality = compute_quality_for_model(&conn, &r.model, cutoff, r.total_turns);
426
427 let eff = ModelEfficiency {
428 model: r.model.clone(),
429 total_turns: r.total_turns,
430 avg_output_density: r.avg_output_density,
431 avg_budget_utilization: 0.0,
432 avg_memory_roi: 0.0,
433 avg_system_prompt_weight: 0.0,
434 cache_hit_rate,
435 context_pressure_rate: 0.0,
436 cost: CostMetrics {
437 total: round6(r.total_cost),
438 per_output_token: round6(per_output_token),
439 effective_per_turn: round6(r.avg_cost_per_turn),
440 cache_savings: round6(cache_savings),
441 cumulative_trend,
442 attribution,
443 wasted_budget_cost: 0.0,
444 },
445 trend,
446 quality,
447 };
448
449 grand_total_cost += r.total_cost;
450 grand_total_turns += r.total_turns;
451
452 match &most_expensive {
453 None => most_expensive = Some((r.model.clone(), r.total_cost)),
454 Some((_, c)) if r.total_cost > *c => {
455 most_expensive = Some((r.model.clone(), r.total_cost));
456 }
457 _ => {}
458 }
459
460 let density = r.avg_output_density;
461 match &most_efficient {
462 None => most_efficient = Some((r.model.clone(), density)),
463 Some((_, d)) if density > *d => {
464 most_efficient = Some((r.model.clone(), density));
465 }
466 _ => {}
467 }
468
469 models.insert(r.model.clone(), eff);
470 }
471
472 let total_cache_savings: f64 = models.values().map(|m| m.cost.cache_savings).sum();
473
474 let biggest_cost_driver = most_expensive
475 .as_ref()
476 .map(|(m, _)| m.clone())
477 .unwrap_or_else(|| "none".into());
478
479 let totals = EfficiencyTotals {
480 total_cost: round6(grand_total_cost),
481 total_cache_savings: round6(total_cache_savings),
482 total_turns: grand_total_turns,
483 most_expensive_model: most_expensive.map(|(m, _)| m),
484 most_efficient_model: most_efficient.map(|(m, _)| m),
485 biggest_cost_driver,
486 };
487
488 Ok(EfficiencyReport {
489 period: period.to_string(),
490 models,
491 time_series,
492 totals,
493 })
494}
495
496#[derive(Debug, Clone, Serialize, Deserialize)]
499pub struct RecommendationModelStats {
500 pub turns: i64,
501 pub avg_cost: f64,
502 pub avg_quality: Option<f64>,
503 pub cache_hit_rate: f64,
504 pub avg_output_density: f64,
505}
506
507#[derive(Debug, Clone, Serialize, Deserialize)]
508pub struct RecommendationUserProfile {
509 pub total_sessions: i64,
510 pub total_turns: i64,
511 pub total_cost: f64,
512 pub avg_quality: Option<f64>,
513 pub grade_coverage: f64,
514 pub models_used: Vec<String>,
515 pub model_stats: HashMap<String, RecommendationModelStats>,
516 pub avg_session_length: f64,
517 pub avg_tokens_per_turn: f64,
518 pub tool_success_rate: f64,
519 pub cache_hit_rate: f64,
520 pub memory_retrieval_rate: f64,
521}
522
523pub fn build_user_profile(db: &Database, period: &str) -> Result<RecommendationUserProfile> {
524 let cutoff = cutoff_expr(period);
525 let conn = db.conn();
526
527 let (total_sessions, avg_session_length): (i64, f64) = conn
528 .query_row(
529 &format!(
530 "SELECT COUNT(*), COALESCE(AVG(msg_count), 0) FROM (\
531 SELECT s.id, COUNT(m.id) AS msg_count \
532 FROM sessions s \
533 LEFT JOIN session_messages m ON m.session_id = s.id \
534 WHERE s.created_at >= {cutoff} \
535 GROUP BY s.id\
536 )"
537 ),
538 [],
539 |row| Ok((row.get(0)?, row.get(1)?)),
540 )
541 .db_err()?;
542
543 let (total_turns, total_cost, avg_tokens_per_turn, cache_hit_rate): (i64, f64, f64, f64) = conn
544 .query_row(
545 &format!(
546 "SELECT \
547 COUNT(*), \
548 COALESCE(SUM(cost), 0), \
549 COALESCE(AVG(tokens_in + tokens_out), 0), \
550 CASE WHEN COUNT(*) > 0 \
551 THEN CAST(SUM(CASE WHEN cached = 1 THEN 1 ELSE 0 END) AS REAL) / COUNT(*) \
552 ELSE 0.0 END \
553 FROM inference_costs \
554 WHERE created_at >= {cutoff}"
555 ),
556 [],
557 |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?, row.get(3)?)),
558 )
559 .db_err()?;
560
561 let mut model_stmt = conn
562 .prepare(&format!(
563 "SELECT \
564 model, \
565 COUNT(*) AS turns, \
566 AVG(cost) AS avg_cost, \
567 CASE WHEN COUNT(*) > 0 \
568 THEN CAST(SUM(CASE WHEN cached = 1 THEN 1 ELSE 0 END) AS REAL) / COUNT(*) \
569 ELSE 0.0 END AS cache_rate, \
570 AVG(CAST(tokens_out AS REAL) / NULLIF(tokens_in, 0)) AS avg_density \
571 FROM inference_costs \
572 WHERE created_at >= {cutoff} \
573 GROUP BY model \
574 ORDER BY turns DESC"
575 ))
576 .db_err()?;
577
578 let mut models_used = Vec::new();
579 let mut model_stats = HashMap::new();
580
581 let rows = model_stmt
582 .query_map([], |row| {
583 Ok((
584 row.get::<_, String>(0)?,
585 row.get::<_, i64>(1)?,
586 row.get::<_, f64>(2)?,
587 row.get::<_, f64>(3)?,
588 row.get::<_, Option<f64>>(4)?,
589 ))
590 })
591 .db_err()?;
592
593 for row in rows {
594 let (model, turns, avg_cost, cache_rate, avg_density) = row.db_err()?;
595 models_used.push(model.clone());
596 model_stats.insert(
597 model,
598 RecommendationModelStats {
599 turns,
600 avg_cost,
601 avg_quality: None,
602 cache_hit_rate: cache_rate,
603 avg_output_density: avg_density.unwrap_or(0.0),
604 },
605 );
606 }
607
608 let tool_success_rate: f64 = conn
609 .query_row(
610 &format!(
611 "SELECT CASE WHEN COUNT(*) > 0 \
612 THEN CAST(SUM(CASE WHEN status = 'success' THEN 1 ELSE 0 END) AS REAL) / COUNT(*) \
613 ELSE 1.0 END \
614 FROM tool_calls WHERE created_at >= {cutoff}"
615 ),
616 [],
617 |row| row.get(0),
618 )
619 .db_err()?;
620
621 let (graded_turns, avg_quality): (i64, Option<f64>) = conn
622 .query_row(
623 &format!(
624 "SELECT COUNT(*), AVG(CAST(tf.grade AS REAL)) \
625 FROM turn_feedback tf \
626 JOIN turns t ON t.id = tf.turn_id \
627 JOIN sessions s ON s.id = t.session_id \
628 WHERE s.created_at >= {cutoff}"
629 ),
630 [],
631 |row| Ok((row.get(0)?, row.get(1)?)),
632 )
633 .unwrap_or((0, None));
634
635 let grade_coverage = if total_turns > 0 {
636 graded_turns as f64 / total_turns as f64
637 } else {
638 0.0
639 };
640
641 let memory_retrieval_rate: f64 = conn
644 .query_row(
645 &format!(
646 "SELECT CASE WHEN COUNT(*) > 0 \
647 THEN CAST(SUM(CASE WHEN memory_tokens > 0 THEN 1 ELSE 0 END) AS REAL) / COUNT(*) \
648 ELSE 0.5 END \
649 FROM context_snapshots WHERE created_at >= {cutoff}"
650 ),
651 [],
652 |row| row.get(0),
653 )
654 .unwrap_or(0.5);
655
656 Ok(RecommendationUserProfile {
657 total_sessions,
658 total_turns,
659 total_cost,
660 avg_quality,
661 grade_coverage,
662 models_used,
663 model_stats,
664 avg_session_length,
665 avg_tokens_per_turn,
666 tool_success_rate,
667 cache_hit_rate,
668 memory_retrieval_rate,
669 })
670}
671
672#[cfg(test)]
673mod tests {
674 use super::*;
675 use crate::metrics::record_inference_cost;
676
677 fn test_db() -> Database {
678 Database::new(":memory:").unwrap()
679 }
680
681 #[test]
682 fn empty_database_returns_empty_report() {
683 let db = test_db();
684 let report = compute_efficiency(&db, "7d", None).unwrap();
685 assert!(report.models.is_empty());
686 assert!(report.time_series.is_empty());
687 assert_eq!(report.totals.total_turns, 0);
688 assert_eq!(report.totals.total_cost, 0.0);
689 }
690
691 #[test]
692 fn single_model_report() {
693 let db = test_db();
694 record_inference_cost(
695 &db,
696 "claude-4",
697 "anthropic",
698 1000,
699 500,
700 0.015,
701 Some("T1"),
702 false,
703 None,
704 None,
705 false,
706 None,
707 )
708 .unwrap();
709 record_inference_cost(
710 &db,
711 "claude-4",
712 "anthropic",
713 2000,
714 800,
715 0.025,
716 Some("T1"),
717 true,
718 None,
719 None,
720 false,
721 None,
722 )
723 .unwrap();
724
725 let report = compute_efficiency(&db, "all", None).unwrap();
726 assert_eq!(report.models.len(), 1);
727 let m = &report.models["claude-4"];
728 assert_eq!(m.total_turns, 2);
729 assert!(m.avg_output_density > 0.0);
730 assert!((m.cost.total - 0.04).abs() < 1e-9);
731 assert_eq!(m.cache_hit_rate, 0.5);
732 }
733
734 #[test]
735 fn model_filter_works() {
736 let db = test_db();
737 record_inference_cost(
738 &db,
739 "claude-4",
740 "anthropic",
741 100,
742 50,
743 0.01,
744 None,
745 false,
746 None,
747 None,
748 false,
749 None,
750 )
751 .unwrap();
752 record_inference_cost(
753 &db, "gpt-4", "openai", 200, 100, 0.02, None, false, None, None, false, None,
754 )
755 .unwrap();
756
757 let report = compute_efficiency(&db, "all", Some("gpt-4")).unwrap();
758 assert_eq!(report.models.len(), 1);
759 assert!(report.models.contains_key("gpt-4"));
760 }
761
762 #[test]
763 fn multiple_models_totals() {
764 let db = test_db();
765 record_inference_cost(
766 &db,
767 "claude-4",
768 "anthropic",
769 100,
770 50,
771 0.01,
772 None,
773 false,
774 None,
775 None,
776 false,
777 None,
778 )
779 .unwrap();
780 record_inference_cost(
781 &db, "gpt-4", "openai", 200, 100, 0.02, None, false, None, None, false, None,
782 )
783 .unwrap();
784
785 let report = compute_efficiency(&db, "all", None).unwrap();
786 assert_eq!(report.totals.total_turns, 2);
787 assert!((report.totals.total_cost - 0.03).abs() < 1e-9);
788 assert_eq!(report.totals.most_expensive_model.as_deref(), Some("gpt-4"));
789 }
790
791 #[test]
792 fn time_series_has_entries() {
793 let db = test_db();
794 record_inference_cost(
795 &db,
796 "claude-4",
797 "anthropic",
798 100,
799 50,
800 0.01,
801 None,
802 false,
803 None,
804 None,
805 false,
806 None,
807 )
808 .unwrap();
809
810 let report = compute_efficiency(&db, "all", None).unwrap();
811 assert!(!report.time_series.is_empty());
812 assert_eq!(report.time_series[0].model, "claude-4");
813 }
814
815 #[test]
816 fn trend_label_logic() {
817 assert_eq!(trend_label(1.0, 1.5), "increasing");
818 assert_eq!(trend_label(1.0, 0.5), "decreasing");
819 assert_eq!(trend_label(1.0, 1.02), "stable");
820 assert_eq!(trend_label(0.0, 0.0), "stable");
821 }
822
823 #[test]
824 fn all_cached_full_rate() {
825 let db = test_db();
826 record_inference_cost(
827 &db, "m1", "p1", 100, 50, 0.01, None, true, None, None, false, None,
828 )
829 .unwrap();
830 record_inference_cost(
831 &db, "m1", "p1", 100, 50, 0.01, None, true, None, None, false, None,
832 )
833 .unwrap();
834
835 let report = compute_efficiency(&db, "all", None).unwrap();
836 assert_eq!(report.models["m1"].cache_hit_rate, 1.0);
837 }
838
839 #[test]
840 fn period_all_vs_default() {
841 assert_eq!(cutoff_expr("all"), "datetime('1970-01-01')");
842 assert_eq!(cutoff_expr("7d"), "datetime('now', '-7 days')");
843 assert_eq!(cutoff_expr("unknown"), "datetime('1970-01-01')");
844 }
845
846 #[test]
847 fn zero_tokens_in_no_division_by_zero() {
848 let db = test_db();
849 record_inference_cost(
850 &db, "m1", "p1", 0, 50, 0.01, None, false, None, None, false, None,
851 )
852 .unwrap();
853
854 let report = compute_efficiency(&db, "all", None).unwrap();
855 let m = &report.models["m1"];
856 assert!(m.avg_output_density.is_finite());
857 }
858
859 #[test]
860 fn cutoff_expr_1h() {
861 assert_eq!(cutoff_expr("1h"), "datetime('now', '-1 hour')");
862 }
863
864 #[test]
865 fn cutoff_expr_24h() {
866 assert_eq!(cutoff_expr("24h"), "datetime('now', '-1 day')");
867 }
868
869 #[test]
870 fn cutoff_expr_30d() {
871 assert_eq!(cutoff_expr("30d"), "datetime('now', '-30 days')");
872 }
873
874 #[test]
875 fn trend_label_edge_cases() {
876 assert_eq!(trend_label(1.0, 1.03), "stable");
878 assert_eq!(trend_label(1.0, 1.10), "increasing");
880 assert_eq!(trend_label(1.0, 0.97), "stable");
882 assert_eq!(trend_label(1.0, 0.90), "decreasing");
884 assert_eq!(trend_label(0.001, 0.5), "increasing");
886 assert_eq!(trend_label(0.0001, 0.0001), "stable");
888 }
889
890 #[test]
891 fn zero_tokens_out_no_division_by_zero() {
892 let db = test_db();
893 record_inference_cost(
894 &db, "m1", "p1", 1000, 0, 0.01, None, false, None, None, false, None,
895 )
896 .unwrap();
897
898 let report = compute_efficiency(&db, "all", None).unwrap();
899 let m = &report.models["m1"];
900 assert_eq!(m.cost.per_output_token, 0.0);
901 assert!(m.cost.total.is_finite());
902 }
903
904 #[test]
905 fn no_cached_zero_rate() {
906 let db = test_db();
907 record_inference_cost(
908 &db, "m1", "p1", 100, 50, 0.01, None, false, None, None, false, None,
909 )
910 .unwrap();
911 record_inference_cost(
912 &db, "m1", "p1", 100, 50, 0.01, None, false, None, None, false, None,
913 )
914 .unwrap();
915
916 let report = compute_efficiency(&db, "all", None).unwrap();
917 assert_eq!(report.models["m1"].cache_hit_rate, 0.0);
918 }
919
920 #[test]
921 fn multiple_models_identifies_most_efficient() {
922 let db = test_db();
923 record_inference_cost(
925 &db, "m1", "p1", 1000, 500, 0.01, None, false, None, None, false, None,
926 )
927 .unwrap();
928 record_inference_cost(
930 &db, "m2", "p2", 100, 200, 0.005, None, false, None, None, false, None,
931 )
932 .unwrap();
933
934 let report = compute_efficiency(&db, "all", None).unwrap();
935 assert_eq!(
936 report.totals.most_efficient_model.as_deref(),
937 Some("m2"),
938 "m2 has higher output density"
939 );
940 }
941
942 #[test]
943 fn trend_metrics_with_time_series() {
944 let db = test_db();
945 let conn = db.conn();
947 for i in 0..6 {
948 let day = format!("2025-01-{:02}T12:00:00", i + 1);
949 conn.execute(
950 "INSERT INTO inference_costs (id, model, provider, tokens_in, tokens_out, cost, cached, created_at) \
951 VALUES (?1, 'claude-4', 'anthropic', ?2, ?3, ?4, 0, ?5)",
952 rusqlite::params![
953 format!("ic-{i}"),
954 1000 + i * 100,
955 500 + i * 50,
956 0.01 + i as f64 * 0.005,
957 day,
958 ],
959 )
960 .unwrap();
961 }
962 drop(conn);
963
964 let report = compute_efficiency(&db, "all", None).unwrap();
965 let m = &report.models["claude-4"];
966 assert!(report.time_series.len() >= 2);
968 assert!(!m.trend.output_density.is_empty());
970 assert!(!m.trend.cost_per_turn.is_empty());
971 assert!(!m.trend.cache_hit_rate.is_empty());
972 }
973
974 #[test]
975 fn build_user_profile_empty_db() {
976 let db = test_db();
977 let profile = build_user_profile(&db, "7d").unwrap();
978 assert_eq!(profile.total_sessions, 0);
979 assert_eq!(profile.total_turns, 0);
980 assert_eq!(profile.total_cost, 0.0);
981 assert!(profile.models_used.is_empty());
982 assert!(profile.model_stats.is_empty());
983 assert_eq!(profile.avg_session_length, 0.0);
984 assert_eq!(profile.avg_tokens_per_turn, 0.0);
985 assert_eq!(profile.tool_success_rate, 1.0); }
987
988 #[test]
989 fn build_user_profile_with_data() {
990 let db = test_db();
991 let conn = db.conn();
992 conn.execute(
994 "INSERT INTO sessions (id, agent_id, scope_key, status) VALUES ('s1', 'agent-1', 'agent', 'active')",
995 [],
996 )
997 .unwrap();
998 conn.execute(
1000 "INSERT INTO session_messages (id, session_id, role, content) VALUES ('m1', 's1', 'user', 'hello')",
1001 [],
1002 )
1003 .unwrap();
1004 conn.execute(
1005 "INSERT INTO session_messages (id, session_id, role, content) VALUES ('m2', 's1', 'assistant', 'hi')",
1006 [],
1007 )
1008 .unwrap();
1009 drop(conn);
1010
1011 record_inference_cost(
1013 &db,
1014 "claude-4",
1015 "anthropic",
1016 1000,
1017 500,
1018 0.015,
1019 Some("T1"),
1020 false,
1021 None,
1022 None,
1023 false,
1024 None,
1025 )
1026 .unwrap();
1027 record_inference_cost(
1028 &db,
1029 "claude-4",
1030 "anthropic",
1031 2000,
1032 800,
1033 0.025,
1034 Some("T1"),
1035 true,
1036 None,
1037 None,
1038 false,
1039 None,
1040 )
1041 .unwrap();
1042 record_inference_cost(
1043 &db, "gpt-4", "openai", 500, 200, 0.01, None, false, None, None, false, None,
1044 )
1045 .unwrap();
1046
1047 {
1049 let conn = db.conn();
1050 conn.execute("INSERT INTO turns (id, session_id) VALUES ('t1', 's1')", [])
1051 .unwrap();
1052 conn.execute(
1053 "INSERT INTO tool_calls (id, turn_id, tool_name, input, status) VALUES ('tc1', 't1', 'bash', '{}', 'success')",
1054 [],
1055 )
1056 .unwrap();
1057 conn.execute(
1058 "INSERT INTO tool_calls (id, turn_id, tool_name, input, status) VALUES ('tc2', 't1', 'bash', '{}', 'error')",
1059 [],
1060 )
1061 .unwrap();
1062 }
1063
1064 let profile = build_user_profile(&db, "all").unwrap();
1065 assert_eq!(profile.total_sessions, 1);
1066 assert_eq!(profile.total_turns, 3);
1067 assert!((profile.total_cost - 0.05).abs() < 1e-9);
1068 assert!(profile.models_used.contains(&"claude-4".to_string()));
1069 assert!(profile.models_used.contains(&"gpt-4".to_string()));
1070 assert_eq!(profile.model_stats.len(), 2);
1071 assert_eq!(profile.model_stats["claude-4"].turns, 2);
1072 assert_eq!(profile.model_stats["gpt-4"].turns, 1);
1073 assert_eq!(profile.avg_session_length, 2.0); assert!(profile.avg_tokens_per_turn > 0.0);
1075 assert_eq!(profile.tool_success_rate, 0.5); assert!(profile.cache_hit_rate > 0.0);
1077 }
1078
1079 #[test]
1080 fn build_user_profile_grade_coverage() {
1081 let db = test_db();
1082 let conn = db.conn();
1083 conn.execute(
1084 "INSERT INTO sessions (id, agent_id, scope_key, status) VALUES ('s1', 'agent-1', 'agent', 'active')",
1085 [],
1086 )
1087 .unwrap();
1088 conn.execute("INSERT INTO turns (id, session_id) VALUES ('t1', 's1')", [])
1089 .unwrap();
1090 conn.execute(
1091 "INSERT INTO turn_feedback (id, turn_id, session_id, grade, source) VALUES ('tf1', 't1', 's1', 4, 'dashboard')",
1092 [],
1093 )
1094 .unwrap();
1095 drop(conn);
1096
1097 record_inference_cost(
1098 &db, "m1", "p1", 100, 50, 0.01, None, false, None, None, false, None,
1099 )
1100 .unwrap();
1101 record_inference_cost(
1102 &db, "m1", "p1", 100, 50, 0.01, None, false, None, None, false, None,
1103 )
1104 .unwrap();
1105
1106 let profile = build_user_profile(&db, "all").unwrap();
1107 assert!(profile.avg_quality.is_some());
1108 assert!((profile.avg_quality.unwrap() - 4.0).abs() < 1e-9);
1109 assert!(profile.grade_coverage > 0.0);
1110 assert!(profile.grade_coverage <= 1.0);
1111 }
1112
1113 #[test]
1114 fn compute_quality_for_model_with_feedback() {
1115 let db = test_db();
1116 let conn = db.conn();
1117 conn.execute(
1118 "INSERT INTO sessions (id, agent_id, scope_key, status) VALUES ('s1', 'a1', 'agent', 'active')",
1119 [],
1120 )
1121 .unwrap();
1122 conn.execute(
1123 "INSERT INTO turns (id, session_id, model, cost) VALUES ('t1', 's1', 'claude-4', 0.01)",
1124 [],
1125 )
1126 .unwrap();
1127 conn.execute(
1128 "INSERT INTO turns (id, session_id, model, cost) VALUES ('t2', 's1', 'claude-4', 0.02)",
1129 [],
1130 )
1131 .unwrap();
1132 conn.execute(
1133 "INSERT INTO turns (id, session_id, model, cost) VALUES ('t3', 's1', 'claude-4', 0.015)",
1134 [],
1135 )
1136 .unwrap();
1137 conn.execute(
1138 "INSERT INTO turns (id, session_id, model, cost) VALUES ('t4', 's1', 'claude-4', 0.025)",
1139 [],
1140 )
1141 .unwrap();
1142 conn.execute(
1144 "INSERT INTO turn_feedback (id, turn_id, session_id, grade) VALUES ('f1', 't1', 's1', 3)",
1145 [],
1146 )
1147 .unwrap();
1148 conn.execute(
1149 "INSERT INTO turn_feedback (id, turn_id, session_id, grade) VALUES ('f2', 't2', 's1', 4)",
1150 [],
1151 )
1152 .unwrap();
1153 conn.execute(
1154 "INSERT INTO turn_feedback (id, turn_id, session_id, grade) VALUES ('f3', 't3', 's1', 5)",
1155 [],
1156 )
1157 .unwrap();
1158 conn.execute(
1159 "INSERT INTO turn_feedback (id, turn_id, session_id, grade) VALUES ('f4', 't4', 's1', 5)",
1160 [],
1161 )
1162 .unwrap();
1163
1164 let quality = compute_quality_for_model(&conn, "claude-4", "datetime('1970-01-01')", 4);
1165 drop(conn);
1166
1167 assert!(quality.is_some());
1168 let q = quality.unwrap();
1169 assert_eq!(q.grade_count, 4);
1170 assert!((q.avg_grade - 4.25).abs() < 1e-9);
1171 assert_eq!(q.grade_coverage, 1.0);
1172 assert!(q.cost_per_quality_point > 0.0);
1173 assert_eq!(q.trend, "increasing");
1176 }
1177
1178 #[test]
1179 fn compute_quality_for_model_no_feedback() {
1180 let db = test_db();
1181 let conn = db.conn();
1182 let quality = compute_quality_for_model(&conn, "claude-4", "datetime('1970-01-01')", 10);
1183 drop(conn);
1184 assert!(quality.is_none());
1185 }
1186
1187 #[test]
1188 fn compute_quality_few_entries_stable_trend() {
1189 let db = test_db();
1190 let conn = db.conn();
1191 conn.execute(
1192 "INSERT INTO sessions (id, agent_id, scope_key, status) VALUES ('s1', 'a1', 'agent', 'active')",
1193 [],
1194 )
1195 .unwrap();
1196 conn.execute(
1197 "INSERT INTO turns (id, session_id, model, cost) VALUES ('t1', 's1', 'claude-4', 0.01)",
1198 [],
1199 )
1200 .unwrap();
1201 conn.execute(
1202 "INSERT INTO turn_feedback (id, turn_id, session_id, grade) VALUES ('f1', 't1', 's1', 4)",
1203 [],
1204 )
1205 .unwrap();
1206
1207 let quality = compute_quality_for_model(&conn, "claude-4", "datetime('1970-01-01')", 1);
1208 drop(conn);
1209
1210 assert!(quality.is_some());
1211 let q = quality.unwrap();
1212 assert_eq!(q.trend, "stable");
1214 }
1215
1216 #[test]
1217 fn report_cost_attribution_all_history() {
1218 let db = test_db();
1219 record_inference_cost(
1220 &db, "m1", "p1", 1000, 500, 0.03, None, false, None, None, false, None,
1221 )
1222 .unwrap();
1223
1224 let report = compute_efficiency(&db, "all", None).unwrap();
1225 let m = &report.models["m1"];
1226 assert_eq!(m.cost.attribution.history.pct, 100.0);
1228 assert!(m.cost.attribution.history.tokens > 0);
1229 assert_eq!(m.cost.attribution.system_prompt.tokens, 0);
1230 assert_eq!(m.cost.attribution.memories.tokens, 0);
1231 }
1232
1233 #[test]
1234 fn report_biggest_cost_driver_with_no_models() {
1235 let db = test_db();
1236 let report = compute_efficiency(&db, "all", None).unwrap();
1237 assert_eq!(report.totals.biggest_cost_driver, "none");
1238 assert!(report.totals.most_expensive_model.is_none());
1239 assert!(report.totals.most_efficient_model.is_none());
1240 }
1241
1242 #[test]
1243 fn build_user_profile_memory_retrieval_default() {
1244 let db = test_db();
1245 let profile = build_user_profile(&db, "all").unwrap();
1246 assert!((profile.memory_retrieval_rate - 0.5).abs() < 1e-9);
1248 }
1249}