collet 0.1.1 - Docs.rs

//! Parameter optimization analyzer.
//!
//! Analyzes per-model statistics from bench.jsonl and generates concrete
//! parameter change suggestions with confidence scores and evidence.

use crate::config::Config;
use crate::optimizer::metrics::ModelStats;

/// A single parameter optimization suggestion.
#[derive(Debug, Clone)]
pub struct Suggestion {
    /// Config field key (matches ConfigItem keys).
    pub key: String,
    /// Human-readable parameter label.
    pub label: String,
    /// Current value (display string).
    pub current: String,
    /// Suggested new value (display string).
    pub suggested: String,
    /// Evidence-based reason for the suggestion.
    pub reason: String,
    /// Confidence score 0.0–1.0 (only suggestions ≥ 0.7 are shown).
    pub confidence: f64,
}

/// Minimum confidence to include a suggestion in the popup.
const MIN_CONFIDENCE: f64 = 0.7;

/// Analyze model statistics against current config and generate suggestions.
pub fn analyze(stats: &ModelStats, config: &Config) -> Vec<Suggestion> {
    let mut suggestions = Vec::new();

    suggest_max_iterations(stats, config, &mut suggestions);
    suggest_tool_timeout(stats, config, &mut suggestions);
    suggest_compaction_threshold(stats, config, &mut suggestions);
    suggest_stream_retries(stats, config, &mut suggestions);
    suggest_iteration_delay(stats, config, &mut suggestions);

    // Filter by minimum confidence
    suggestions.retain(|s| s.confidence >= MIN_CONFIDENCE);
    // Sort by confidence descending
    suggestions.sort_by(|a, b| {
        b.confidence
            .partial_cmp(&a.confidence)
            .unwrap_or(std::cmp::Ordering::Equal)
    });
    suggestions
}

/// max_iterations: if P95 is well below the current limit, suggest lowering.
fn suggest_max_iterations(stats: &ModelStats, config: &Config, out: &mut Vec<Suggestion>) {
    let current = config.max_iterations;
    let p95 = stats.iterations.p95 as u32;
    let max_observed = stats.iterations.max as u32;

    // Only suggest if P95 is less than 60% of current limit
    if p95 == 0 || p95 as f64 >= current as f64 * 0.6 {
        return;
    }

    // New limit: P95 * 1.5, rounded up to nearest 5, but at least max_observed + 5
    let headroom = ((p95 as f64 * 1.5).ceil() as u32).max(max_observed + 5);
    let suggested = round_up_to(headroom, 5).max(10).min(current);

    if suggested >= current {
        return;
    }

    // Confidence: higher when P95 is much lower than current
    let ratio = p95 as f64 / current as f64;
    let confidence = (1.0 - ratio).min(0.95);

    let min_observed = stats.iterations.min as u32;
    let p50 = stats.iterations.p50 as u32;
    out.push(Suggestion {
        key: "max_iterations".to_string(),
        label: "max_iterations".to_string(),
        current: current.to_string(),
        suggested: suggested.to_string(),
        reason: format!(
            "min={min_observed}, P50={p50}, P95={p95}, max={max_observed} across {} sessions. \
             Current limit {current} is {:.0}× the P95.",
            stats.session_count,
            current as f64 / p95 as f64,
        ),
        confidence,
    });
}

/// tool_timeout_secs: if tool latency P95 is well below timeout, suggest lowering.
fn suggest_tool_timeout(stats: &ModelStats, config: &Config, out: &mut Vec<Suggestion>) {
    let current = config.tool_timeout_secs;
    let min_ms = stats.tool_latency_avg.min;
    let p50_ms = stats.tool_latency_avg.p50;
    let p95_ms = stats.tool_latency_avg.p95;
    let max_ms = stats.tool_latency_avg.max;
    tracing::trace!(min_ms, p50_ms, "Tool latency distribution");

    // Only if P95 latency is under 30% of timeout
    let p95_secs = (p95_ms / 1000.0).ceil() as u64;
    let max_secs = (max_ms / 1000.0).ceil() as u64;
    if p95_secs == 0 || p95_secs as f64 >= current as f64 * 0.3 {
        return;
    }

    // New timeout: max observed * 2, rounded to 10s, minimum 30s
    let suggested = round_up_to_u64(max_secs * 2, 10).max(30).min(current);
    if suggested >= current {
        return;
    }

    let confidence = ((1.0 - p95_secs as f64 / current as f64) * 0.9).min(0.9);

    out.push(Suggestion {
        key: "tool_timeout_secs".to_string(),
        label: "tool_timeout".to_string(),
        current: format!("{current}s"),
        suggested: format!("{suggested}s"),
        reason: format!(
            "Tool latency P95={p95_ms:.0}ms, max={max_ms:.0}ms. \
             Current {current}s timeout is {:.0}× the max observed.",
            current as f64 / max_secs.max(1) as f64,
        ),
        confidence,
    });
}

/// compaction_threshold: suggest raising if compactions are frequent with low context usage,
/// or lowering if context often hits 95%+ without compaction.
fn suggest_compaction_threshold(stats: &ModelStats, config: &Config, out: &mut Vec<Suggestion>) {
    let current = config.compaction_threshold;

    // Case 1: Frequent compactions (mean > 1 per task) but context stays under 70%
    // → threshold is too aggressive, raise it
    if stats.compactions.mean > 1.0 && stats.ctx_pct.p50 < 70 {
        let suggested = (current + 0.05).min(0.90);
        if (suggested - current).abs() < 0.01 {
            return;
        }
        out.push(Suggestion {
            key: "compaction_threshold".to_string(),
            label: "compaction_threshold".to_string(),
            current: format!("{current:.2}"),
            suggested: format!("{suggested:.2}"),
            reason: format!(
                "Average {:.1} compactions/task but median context at {}%. \
                 Threshold may be too aggressive — raising preserves more context.",
                stats.compactions.mean, stats.ctx_pct.p50,
            ),
            confidence: 0.75,
        });
        return;
    }

    // Case 2: Context often at 90%+ but few compactions → threshold may be too high
    if stats.ctx_pct.p95 >= 90 && stats.compactions.mean < 0.5 {
        let suggested = (current - 0.05).max(0.50);
        if (suggested - current).abs() < 0.01 {
            return;
        }
        out.push(Suggestion {
            key: "compaction_threshold".to_string(),
            label: "compaction_threshold".to_string(),
            current: format!("{current:.2}"),
            suggested: format!("{suggested:.2}"),
            reason: format!(
                "Context hits 90%+ at P95 but only {:.1} compactions/task. \
                 Lowering threshold enables earlier compaction to prevent context overflow.",
                stats.compactions.mean,
            ),
            confidence: 0.8,
        });
    }
}

/// stream_max_retries: if retries are never needed, suggest lowering.
fn suggest_stream_retries(stats: &ModelStats, config: &Config, out: &mut Vec<Suggestion>) {
    let current = config.stream_max_retries;
    if current <= 2 {
        return; // already minimal
    }

    // If no retries observed across many sessions, suggest reducing
    if stats.stream_retry_sessions == 0 && stats.session_count >= 10 {
        out.push(Suggestion {
            key: "stream_max_retries".to_string(),
            label: "stream_max_retries".to_string(),
            current: current.to_string(),
            suggested: "3".to_string(),
            reason: format!(
                "Zero stream retries across {} sessions. \
                 Reducing from {current} to 3 lowers worst-case wait without losing resilience.",
                stats.session_count,
            ),
            confidence: 0.7,
        });
    }
}

/// iteration_delay_ms: if API latency is high, a shorter delay is fine;
/// if rate limiting is an issue, suggest raising.
fn suggest_iteration_delay(stats: &ModelStats, config: &Config, out: &mut Vec<Suggestion>) {
    let current = config.iteration_delay_ms;
    let avg_api_ms = stats.api_latency_avg.mean;

    // If API latency is consistently high (>3s) and delay is >50ms, delay is pointless
    if avg_api_ms > 3000.0 && current > 50 {
        out.push(Suggestion {
            key: "iteration_delay_ms".to_string(),
            label: "iteration_delay".to_string(),
            current: format!("{current}ms"),
            suggested: "20ms".to_string(),
            reason: format!(
                "Average API latency is {avg_api_ms:.0}ms — the {current}ms courtesy delay \
                 adds negligible rate-limit protection. Lowering to 20ms improves responsiveness.",
            ),
            confidence: 0.75,
        });
    }
}

fn round_up_to(val: u32, step: u32) -> u32 {
    val.div_ceil(step) * step
}

fn round_up_to_u64(val: u64, step: u64) -> u64 {
    val.div_ceil(step) * step
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::optimizer::metrics::{Percentiles, PercentilesF};

    fn dummy_config() -> Config {
        let mut config = Config::default_for_test();
        config.max_iterations = 50;
        config.tool_timeout_secs = 120;
        config.compaction_threshold = 0.80;
        config.stream_max_retries = 5;
        config.iteration_delay_ms = 50;
        config
    }

    fn dummy_stats() -> ModelStats {
        ModelStats {
            model: "test-model".to_string(),
            session_count: 10,
            iterations: Percentiles {
                min: 3,
                p50: 8,
                p95: 15,
                max: 20,
                mean: 9.0,
            },
            duration_secs: Percentiles {
                min: 10,
                p50: 60,
                p95: 180,
                max: 300,
                mean: 80.0,
            },
            tool_calls: Percentiles {
                min: 2,
                p50: 10,
                p95: 25,
                max: 35,
                mean: 12.0,
            },
            tool_success_rate: PercentilesF {
                min: 90.0,
                p50: 98.0,
                p95: 100.0,
                max: 100.0,
                mean: 97.0,
            },
            tokens_in: Percentiles {
                min: 5000,
                p50: 30000,
                p95: 80000,
                max: 100000,
                mean: 35000.0,
            },
            tokens_out: Percentiles {
                min: 500,
                p50: 3000,
                p95: 8000,
                max: 12000,
                mean: 3500.0,
            },
            ctx_pct: Percentiles {
                min: 10,
                p50: 40,
                p95: 70,
                max: 85,
                mean: 42.0,
            },
            compactions: Percentiles {
                min: 0,
                p50: 0,
                p95: 1,
                max: 2,
                mean: 0.3,
            },
            tool_latency_avg: PercentilesF {
                min: 50.0,
                p50: 200.0,
                p95: 800.0,
                max: 2000.0,
                mean: 300.0,
            },
            api_latency_avg: PercentilesF {
                min: 500.0,
                p50: 2000.0,
                p95: 5000.0,
                max: 8000.0,
                mean: 2500.0,
            },
            cache_pct: Percentiles {
                min: 0,
                p50: 30,
                p95: 60,
                max: 80,
                mean: 32.0,
            },
            stream_retry_sessions: 0,
        }
    }

    #[test]
    fn test_suggest_max_iterations() {
        let config = dummy_config();
        let stats = dummy_stats(); // p95=15, max=20, current=50
        let suggestions = analyze(&stats, &config);
        let iter_sug = suggestions.iter().find(|s| s.key == "max_iterations");
        assert!(iter_sug.is_some(), "Should suggest lowering max_iterations");
        let s = iter_sug.unwrap();
        let val: u32 = s.suggested.parse().unwrap();
        assert!(val < 50, "Suggested should be lower than 50");
        assert!(
            val >= 25,
            "Suggested should be at least max_observed + headroom"
        );
    }

    #[test]
    fn test_no_suggestion_when_close() {
        let mut stats = dummy_stats();
        stats.iterations = Percentiles {
            min: 10,
            p50: 30,
            p95: 45,
            max: 48,
            mean: 35.0,
        };
        let config = dummy_config();
        let suggestions = analyze(&stats, &config);
        let iter_sug = suggestions.iter().find(|s| s.key == "max_iterations");
        assert!(
            iter_sug.is_none(),
            "Should not suggest when P95 is close to limit"
        );
    }
}