collet 0.1.0

Relentless agentic coding orchestrator with zero-drop agent loops
Documentation
//! Model-specific metrics aggregation from bench.jsonl.
//!
//! Reads the existing benchmark log and groups entries by model name,
//! computing per-model statistics used by the analyzer to generate
//! parameter optimization suggestions.

use std::collections::HashMap;
use std::path::Path;

use crate::bench::BenchEntry;

/// Aggregated statistics for a single model.
#[derive(Debug, Clone)]
pub struct ModelStats {
    /// Model name (e.g. "glm-4.7").
    pub model: String,
    /// Number of completed (non-cancelled) sessions analyzed.
    pub session_count: usize,
    /// Iterations: [min, p50, p95, max].
    pub iterations: Percentiles,
    /// Task duration in seconds: [min, p50, p95, max].
    pub duration_secs: Percentiles,
    /// Tool calls per task: [min, p50, p95, max].
    pub tool_calls: Percentiles,
    /// Tool success rate (0–100): [min, p50, p95, max].
    pub tool_success_rate: PercentilesF,
    /// Input tokens per task: [min, p50, p95, max].
    pub tokens_in: Percentiles,
    /// Output tokens per task: [min, p50, p95, max].
    pub tokens_out: Percentiles,
    /// Context usage % at task end: [min, p50, p95, max].
    pub ctx_pct: Percentiles,
    /// Compaction count per task: [min, p50, p95, max].
    pub compactions: Percentiles,
    /// Average tool latency in ms: [min, p50, p95, max].
    pub tool_latency_avg: PercentilesF,
    /// Average API latency in ms: [min, p50, p95, max].
    pub api_latency_avg: PercentilesF,
    /// Cache hit rate (0–100): [min, p50, p95, max].
    pub cache_pct: Percentiles,
    /// Stream retry events observed.
    pub stream_retry_sessions: usize,
}

/// Integer percentile summary.
#[derive(Debug, Clone)]
pub struct Percentiles {
    pub min: u64,
    pub p50: u64,
    pub p95: u64,
    pub max: u64,
    pub mean: f64,
}

/// Float percentile summary.
#[derive(Debug, Clone)]
pub struct PercentilesF {
    pub min: f64,
    pub p50: f64,
    pub p95: f64,
    pub max: f64,
    pub mean: f64,
}

/// Minimum sessions required before generating optimization suggestions.
pub const MIN_SESSIONS: usize = 5;

/// Load bench entries from the JSONL file.
pub fn load_bench_entries(path: &Path) -> Vec<BenchEntry> {
    let Ok(content) = std::fs::read_to_string(path) else {
        return Vec::new();
    };
    content
        .lines()
        .filter_map(|line| serde_json::from_str::<BenchEntry>(line).ok())
        .collect()
}

/// Group bench entries by model name, filtering out cancelled tasks.
pub fn group_by_model(entries: &[BenchEntry]) -> HashMap<String, Vec<&BenchEntry>> {
    let mut groups: HashMap<String, Vec<&BenchEntry>> = HashMap::new();
    for entry in entries {
        if entry.cancelled {
            continue;
        }
        groups.entry(entry.model.clone()).or_default().push(entry);
    }
    groups
}

/// Compute per-model statistics from bench entries.
pub fn compute_model_stats(model: &str, entries: &[&BenchEntry]) -> Option<ModelStats> {
    if entries.len() < MIN_SESSIONS {
        return None;
    }

    let iterations: Vec<u64> = entries.iter().map(|e| e.iter as u64).collect();
    let durations: Vec<u64> = entries.iter().map(|e| e.secs).collect();
    let tools: Vec<u64> = entries.iter().map(|e| e.tools as u64).collect();
    let success_rates: Vec<f64> = entries.iter().map(|e| e.tool_success_rate as f64).collect();
    let tokens_in: Vec<u64> = entries.iter().map(|e| e.tokens_in).collect();
    let tokens_out: Vec<u64> = entries.iter().map(|e| e.tokens_out).collect();
    let ctx_pcts: Vec<u64> = entries.iter().map(|e| e.ctx_pct as u64).collect();
    let compactions: Vec<u64> = entries.iter().map(|e| e.compactions as u64).collect();
    let tool_lats: Vec<f64> = entries.iter().map(|e| e.tool_latency_avg_ms).collect();
    let api_lats: Vec<f64> = entries.iter().map(|e| e.api_latency_avg_ms).collect();
    let cache_pcts: Vec<u64> = entries.iter().map(|e| e.cache_pct as u64).collect();

    Some(ModelStats {
        model: model.to_string(),
        session_count: entries.len(),
        iterations: percentiles_u64(&iterations),
        duration_secs: percentiles_u64(&durations),
        tool_calls: percentiles_u64(&tools),
        tool_success_rate: percentiles_f64(&success_rates),
        tokens_in: percentiles_u64(&tokens_in),
        tokens_out: percentiles_u64(&tokens_out),
        ctx_pct: percentiles_u64(&ctx_pcts),
        compactions: percentiles_u64(&compactions),
        tool_latency_avg: percentiles_f64(&tool_lats),
        api_latency_avg: percentiles_f64(&api_lats),
        cache_pct: percentiles_u64(&cache_pcts),
        stream_retry_sessions: 0, // Not tracked in bench.jsonl currently
    })
}

fn percentiles_u64(data: &[u64]) -> Percentiles {
    let mut sorted = data.to_vec();
    sorted.sort_unstable();
    let n = sorted.len();
    if n == 0 {
        return Percentiles {
            min: 0,
            p50: 0,
            p95: 0,
            max: 0,
            mean: 0.0,
        };
    }
    let mean = sorted.iter().sum::<u64>() as f64 / n as f64;
    Percentiles {
        min: sorted[0],
        p50: sorted[n / 2],
        p95: sorted[(n as f64 * 0.95) as usize],
        max: sorted[n - 1],
        mean,
    }
}

fn percentiles_f64(data: &[f64]) -> PercentilesF {
    let mut sorted = data.to_vec();
    sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
    let n = sorted.len();
    if n == 0 {
        return PercentilesF {
            min: 0.0,
            p50: 0.0,
            p95: 0.0,
            max: 0.0,
            mean: 0.0,
        };
    }
    let mean = sorted.iter().sum::<f64>() / n as f64;
    PercentilesF {
        min: sorted[0],
        p50: sorted[n / 2],
        p95: sorted[(n as f64 * 0.95) as usize],
        max: sorted[n - 1],
        mean,
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_percentiles_u64() {
        let data = vec![1, 3, 5, 7, 9, 11, 13, 15, 17, 19];
        let p = percentiles_u64(&data);
        assert_eq!(p.min, 1);
        assert_eq!(p.max, 19);
        assert_eq!(p.p50, 11);
        assert!((p.mean - 10.0).abs() < 0.01);
    }

    #[test]
    fn test_percentiles_empty() {
        let p = percentiles_u64(&[]);
        assert_eq!(p.min, 0);
        assert_eq!(p.max, 0);
    }
}