use crate::config::Config;
use crate::optimizer::metrics::ModelStats;
#[derive(Debug, Clone)]
pub struct Suggestion {
pub key: String,
pub label: String,
pub current: String,
pub suggested: String,
pub reason: String,
pub confidence: f64,
}
const MIN_CONFIDENCE: f64 = 0.7;
pub fn analyze(stats: &ModelStats, config: &Config) -> Vec<Suggestion> {
let mut suggestions = Vec::new();
suggest_max_iterations(stats, config, &mut suggestions);
suggest_tool_timeout(stats, config, &mut suggestions);
suggest_compaction_threshold(stats, config, &mut suggestions);
suggest_stream_retries(stats, config, &mut suggestions);
suggest_iteration_delay(stats, config, &mut suggestions);
suggestions.retain(|s| s.confidence >= MIN_CONFIDENCE);
suggestions.sort_by(|a, b| {
b.confidence
.partial_cmp(&a.confidence)
.unwrap_or(std::cmp::Ordering::Equal)
});
suggestions
}
fn suggest_max_iterations(stats: &ModelStats, config: &Config, out: &mut Vec<Suggestion>) {
let current = config.max_iterations;
let p95 = stats.iterations.p95 as u32;
let max_observed = stats.iterations.max as u32;
if p95 == 0 || p95 as f64 >= current as f64 * 0.6 {
return;
}
let headroom = ((p95 as f64 * 1.5).ceil() as u32).max(max_observed + 5);
let suggested = round_up_to(headroom, 5).max(10).min(current);
if suggested >= current {
return;
}
let ratio = p95 as f64 / current as f64;
let confidence = (1.0 - ratio).min(0.95);
let min_observed = stats.iterations.min as u32;
let p50 = stats.iterations.p50 as u32;
out.push(Suggestion {
key: "max_iterations".to_string(),
label: "max_iterations".to_string(),
current: current.to_string(),
suggested: suggested.to_string(),
reason: format!(
"min={min_observed}, P50={p50}, P95={p95}, max={max_observed} across {} sessions. \
Current limit {current} is {:.0}× the P95.",
stats.session_count,
current as f64 / p95 as f64,
),
confidence,
});
}
fn suggest_tool_timeout(stats: &ModelStats, config: &Config, out: &mut Vec<Suggestion>) {
let current = config.tool_timeout_secs;
let min_ms = stats.tool_latency_avg.min;
let p50_ms = stats.tool_latency_avg.p50;
let p95_ms = stats.tool_latency_avg.p95;
let max_ms = stats.tool_latency_avg.max;
tracing::trace!(min_ms, p50_ms, "Tool latency distribution");
let p95_secs = (p95_ms / 1000.0).ceil() as u64;
let max_secs = (max_ms / 1000.0).ceil() as u64;
if p95_secs == 0 || p95_secs as f64 >= current as f64 * 0.3 {
return;
}
let suggested = round_up_to_u64(max_secs * 2, 10).max(30).min(current);
if suggested >= current {
return;
}
let confidence = ((1.0 - p95_secs as f64 / current as f64) * 0.9).min(0.9);
out.push(Suggestion {
key: "tool_timeout_secs".to_string(),
label: "tool_timeout".to_string(),
current: format!("{current}s"),
suggested: format!("{suggested}s"),
reason: format!(
"Tool latency P95={p95_ms:.0}ms, max={max_ms:.0}ms. \
Current {current}s timeout is {:.0}× the max observed.",
current as f64 / max_secs.max(1) as f64,
),
confidence,
});
}
fn suggest_compaction_threshold(stats: &ModelStats, config: &Config, out: &mut Vec<Suggestion>) {
let current = config.compaction_threshold;
if stats.compactions.mean > 1.0 && stats.ctx_pct.p50 < 70 {
let suggested = (current + 0.05).min(0.90);
if (suggested - current).abs() < 0.01 {
return;
}
out.push(Suggestion {
key: "compaction_threshold".to_string(),
label: "compaction_threshold".to_string(),
current: format!("{current:.2}"),
suggested: format!("{suggested:.2}"),
reason: format!(
"Average {:.1} compactions/task but median context at {}%. \
Threshold may be too aggressive — raising preserves more context.",
stats.compactions.mean, stats.ctx_pct.p50,
),
confidence: 0.75,
});
return;
}
if stats.ctx_pct.p95 >= 90 && stats.compactions.mean < 0.5 {
let suggested = (current - 0.05).max(0.50);
if (suggested - current).abs() < 0.01 {
return;
}
out.push(Suggestion {
key: "compaction_threshold".to_string(),
label: "compaction_threshold".to_string(),
current: format!("{current:.2}"),
suggested: format!("{suggested:.2}"),
reason: format!(
"Context hits 90%+ at P95 but only {:.1} compactions/task. \
Lowering threshold enables earlier compaction to prevent context overflow.",
stats.compactions.mean,
),
confidence: 0.8,
});
}
}
fn suggest_stream_retries(stats: &ModelStats, config: &Config, out: &mut Vec<Suggestion>) {
let current = config.stream_max_retries;
if current <= 2 {
return; }
if stats.stream_retry_sessions == 0 && stats.session_count >= 10 {
out.push(Suggestion {
key: "stream_max_retries".to_string(),
label: "stream_max_retries".to_string(),
current: current.to_string(),
suggested: "3".to_string(),
reason: format!(
"Zero stream retries across {} sessions. \
Reducing from {current} to 3 lowers worst-case wait without losing resilience.",
stats.session_count,
),
confidence: 0.7,
});
}
}
fn suggest_iteration_delay(stats: &ModelStats, config: &Config, out: &mut Vec<Suggestion>) {
let current = config.iteration_delay_ms;
let avg_api_ms = stats.api_latency_avg.mean;
if avg_api_ms > 3000.0 && current > 50 {
out.push(Suggestion {
key: "iteration_delay_ms".to_string(),
label: "iteration_delay".to_string(),
current: format!("{current}ms"),
suggested: "20ms".to_string(),
reason: format!(
"Average API latency is {avg_api_ms:.0}ms — the {current}ms courtesy delay \
adds negligible rate-limit protection. Lowering to 20ms improves responsiveness.",
),
confidence: 0.75,
});
}
}
fn round_up_to(val: u32, step: u32) -> u32 {
val.div_ceil(step) * step
}
fn round_up_to_u64(val: u64, step: u64) -> u64 {
val.div_ceil(step) * step
}
#[cfg(test)]
mod tests {
use super::*;
use crate::optimizer::metrics::{Percentiles, PercentilesF};
fn dummy_config() -> Config {
let mut config = Config::default_for_test();
config.max_iterations = 50;
config.tool_timeout_secs = 120;
config.compaction_threshold = 0.80;
config.stream_max_retries = 5;
config.iteration_delay_ms = 50;
config
}
fn dummy_stats() -> ModelStats {
ModelStats {
model: "test-model".to_string(),
session_count: 10,
iterations: Percentiles {
min: 3,
p50: 8,
p95: 15,
max: 20,
mean: 9.0,
},
duration_secs: Percentiles {
min: 10,
p50: 60,
p95: 180,
max: 300,
mean: 80.0,
},
tool_calls: Percentiles {
min: 2,
p50: 10,
p95: 25,
max: 35,
mean: 12.0,
},
tool_success_rate: PercentilesF {
min: 90.0,
p50: 98.0,
p95: 100.0,
max: 100.0,
mean: 97.0,
},
tokens_in: Percentiles {
min: 5000,
p50: 30000,
p95: 80000,
max: 100000,
mean: 35000.0,
},
tokens_out: Percentiles {
min: 500,
p50: 3000,
p95: 8000,
max: 12000,
mean: 3500.0,
},
ctx_pct: Percentiles {
min: 10,
p50: 40,
p95: 70,
max: 85,
mean: 42.0,
},
compactions: Percentiles {
min: 0,
p50: 0,
p95: 1,
max: 2,
mean: 0.3,
},
tool_latency_avg: PercentilesF {
min: 50.0,
p50: 200.0,
p95: 800.0,
max: 2000.0,
mean: 300.0,
},
api_latency_avg: PercentilesF {
min: 500.0,
p50: 2000.0,
p95: 5000.0,
max: 8000.0,
mean: 2500.0,
},
cache_pct: Percentiles {
min: 0,
p50: 30,
p95: 60,
max: 80,
mean: 32.0,
},
stream_retry_sessions: 0,
}
}
#[test]
fn test_suggest_max_iterations() {
let config = dummy_config();
let stats = dummy_stats(); let suggestions = analyze(&stats, &config);
let iter_sug = suggestions.iter().find(|s| s.key == "max_iterations");
assert!(iter_sug.is_some(), "Should suggest lowering max_iterations");
let s = iter_sug.unwrap();
let val: u32 = s.suggested.parse().unwrap();
assert!(val < 50, "Suggested should be lower than 50");
assert!(
val >= 25,
"Suggested should be at least max_observed + headroom"
);
}
#[test]
fn test_no_suggestion_when_close() {
let mut stats = dummy_stats();
stats.iterations = Percentiles {
min: 10,
p50: 30,
p95: 45,
max: 48,
mean: 35.0,
};
let config = dummy_config();
let suggestions = analyze(&stats, &config);
let iter_sug = suggestions.iter().find(|s| s.key == "max_iterations");
assert!(
iter_sug.is_none(),
"Should not suggest when P95 is close to limit"
);
}
}