cortex-rs-stats 0.2.0

// Routing savings calculator.
//
// Each row in route_decisions records the model the router chose and the
// model that would have been used as the unrouted baseline. We estimate
// per-task cost using a fixed typical-task token assumption — the absolute
// number is rough, but the *delta* between chosen vs baseline is honest as
// long as both models are priced and the assumed mix doesn't change.
//
// Assumed typical Claude Code task: 5K input tokens + 2K output tokens.
// Rationale: average over a real session of Read/Edit/Bash/MCP-call turns;
// long-context turns rarer than short ones.

use anyhow::Result;
use cortex_rs_core::CortexConfig;
use rusqlite::Connection;

const ASSUMED_INPUT_TOKENS: f64 = 5_000.0;
const ASSUMED_OUTPUT_TOKENS: f64 = 2_000.0;

pub fn compute(conn: &Connection, config: &CortexConfig, days: u32) -> Result<f64> {
    let cutoff = chrono::Utc::now().timestamp() - (days as i64) * 86_400;

    let mut stmt = conn.prepare(
        "SELECT chosen_model, baseline_model, COUNT(*)
         FROM route_decisions
         WHERE decided_at >= ?1
         GROUP BY chosen_model, baseline_model",
    )?;

    let rows = stmt.query_map([cutoff], |r| {
        Ok((r.get::<_, String>(0)?, r.get::<_, String>(1)?, r.get::<_, i64>(2)?))
    })?;

    let mut total_savings = 0.0_f64;
    for row in rows {
        let (chosen, baseline, count) = row?;
        let chosen_cost = task_cost(&chosen, config);
        let baseline_cost = task_cost(&baseline, config);
        let per_task_saving = baseline_cost - chosen_cost;
        if per_task_saving > 0.0 {
            total_savings += per_task_saving * count as f64;
        }
    }

    Ok(total_savings)
}

fn task_cost(model: &str, config: &CortexConfig) -> f64 {
    let Some(pricing) = config.pricing.get(model) else {
        return 0.0;
    };
    (ASSUMED_INPUT_TOKENS / 1_000_000.0) * pricing.input
        + (ASSUMED_OUTPUT_TOKENS / 1_000_000.0) * pricing.output
}

#[cfg(test)]
mod tests {
    use super::*;
    use cortex_rs_core::{
        config::{
            DaemonConfig, McpConfig, McpTransport, MemoryConfig, ModelPricing, ProvidersConfig,
            RoleConfig, RoutingConfig, RoutingStrategy,
        },
        Db,
    };
    use std::collections::HashMap;

    fn test_config() -> CortexConfig {
        let mut pricing = HashMap::new();
        pricing.insert("opus".into(), ModelPricing {
            input: 15.0, output: 75.0, cache_creation: None, cache_read: None,
        });
        pricing.insert("haiku".into(), ModelPricing {
            input: 0.8, output: 4.0, cache_creation: None, cache_read: None,
        });
        CortexConfig {
            daemon: DaemonConfig {
                db: "x".into(),
                notes_dir: "x".into(),
                socket: "x".into(),
            },
            mcp: McpConfig { transport: McpTransport::Stdio },
            providers: ProvidersConfig::default(),
            pricing,
            roles: HashMap::new(),
            routing: RoutingConfig {
                strategy: RoutingStrategy::RuleBased,
                fallback: "x".into(),
                baseline: Some("architect".into()),
            },
            memory: MemoryConfig {
                hot_limit: 20,
                cold_threshold_days: 90,
                cold_confidence_max: 0.4,
                decay_lambda: 0.02,
            },
        }
    }

    #[test]
    fn savings_zero_when_no_decisions() {
        let db = Db::open_in_memory().unwrap();
        let s = compute(&db.conn, &test_config(), 7).unwrap();
        assert_eq!(s, 0.0);
    }

    #[test]
    fn savings_positive_when_routing_to_cheaper() {
        let db = Db::open_in_memory().unwrap();
        db.conn
            .execute(
                "INSERT INTO route_decisions (task_excerpt, role, chosen_model, baseline_model)
                 VALUES ('a', 'docs', 'haiku', 'opus')",
                [],
            )
            .unwrap();
        let s = compute(&db.conn, &test_config(), 7).unwrap();
        // opus task = 5K*15 + 2K*75 = $75+$150 per million → $0.225
        // haiku task = 5K*0.8 + 2K*4 = $4+$8 per million → $0.012
        // savings ≈ $0.213 per task
        assert!(s > 0.20 && s < 0.25, "expected ~$0.213, got ${}", s);
    }
}