#[derive(Debug, Clone)]
pub struct HttpBenchmarkConfig {
pub cv_criterion: CvStoppingCriterion,
pub warmup_iterations: usize,
pub prompt: String,
pub max_tokens: usize,
pub temperature: f32,
pub run_preflight: bool,
pub filter_outliers: bool,
pub outlier_k_factor: f64,
}
impl Default for HttpBenchmarkConfig {
fn default() -> Self {
Self {
cv_criterion: CvStoppingCriterion::default(), warmup_iterations: 2,
prompt: canonical_inputs::LATENCY_PROMPT.to_string(),
max_tokens: canonical_inputs::MAX_TOKENS,
temperature: 0.0, run_preflight: true,
filter_outliers: true,
outlier_k_factor: 3.0,
}
}
}
impl HttpBenchmarkConfig {
#[must_use]
pub fn relaxed() -> Self {
Self {
cv_criterion: CvStoppingCriterion::new(3, 10, 0.20), warmup_iterations: 1,
run_preflight: false,
filter_outliers: false,
..Default::default()
}
}
#[must_use]
pub fn reproducible() -> Self {
Self {
cv_criterion: CvStoppingCriterion::new(10, 50, 0.03), warmup_iterations: 3,
run_preflight: true,
filter_outliers: true,
outlier_k_factor: 2.5, ..Default::default()
}
}
#[must_use]
pub fn min_samples(&self) -> usize {
self.cv_criterion.min_samples
}
#[must_use]
pub fn max_samples(&self) -> usize {
self.cv_criterion.max_samples
}
#[must_use]
pub fn cv_threshold(&self) -> f64 {
self.cv_criterion.cv_threshold
}
}
#[derive(Debug, Clone)]
pub struct HttpBenchmarkResult {
pub latency_samples: Vec<f64>,
pub latency_samples_filtered: Vec<f64>,
pub mean_latency_ms: f64,
pub p50_latency_ms: f64,
pub p99_latency_ms: f64,
pub std_dev_ms: f64,
pub cv_at_stop: f64,
pub throughput_tps: f64,
pub cold_start_ms: f64,
pub sample_count: usize,
pub filtered_sample_count: usize,
pub cv_converged: bool,
pub quality_metrics: QualityMetrics,
}
pub struct HttpBenchmarkRunner {
client: ModelHttpClient,
config: HttpBenchmarkConfig,
preflight_runner: Option<PreflightRunner>,
outlier_detector: OutlierDetector,
}