use crate::atp::benchmark::BenchmarkEnvironment;
use serde::{Deserialize, Serialize};
use std::collections::BTreeMap;
use std::time::Duration;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BenchmarkMetrics {
pub wall_time: Duration,
pub cpu_time: Option<Duration>,
pub memory_peak: Option<u64>,
pub bytes_transferred: u64,
pub bytes_on_wire: Option<u64>,
pub verified_completion: bool,
pub first_usable_output: Option<Duration>,
pub resume_time: Option<Duration>,
pub disk_amplification_ratio: Option<f64>,
pub failure_reproducible: Option<bool>,
pub failure_mode: Option<String>,
}
impl BenchmarkMetrics {
#[must_use]
pub fn throughput_bps(&self) -> Option<f64> {
if self.verified_completion && self.wall_time.as_secs_f64() > 0.0 {
Some(self.bytes_transferred as f64 / self.wall_time.as_secs_f64())
} else {
None
}
}
#[must_use]
pub fn compression_ratio(&self) -> Option<f64> {
self.bytes_on_wire
.map(|on_wire| self.bytes_transferred as f64 / on_wire as f64)
}
#[must_use]
pub fn cpu_efficiency(&self) -> Option<f64> {
self.cpu_time.and_then(|cpu_time| {
if cpu_time.as_secs_f64() > 0.0 {
Some(self.bytes_transferred as f64 / cpu_time.as_secs_f64())
} else {
None
}
})
}
#[must_use]
pub fn cpu_ms_per_gib(&self) -> Option<f64> {
self.cpu_time.and_then(|cpu_time| {
if self.bytes_transferred > 0 {
let gib = self.bytes_transferred as f64 / 1_073_741_824.0;
Some(cpu_time.as_secs_f64() * 1000.0 / gib)
} else {
None
}
})
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BenchmarkResult {
pub tool_name: String,
pub iterations: Vec<BenchmarkMetrics>,
pub environment: BenchmarkEnvironment,
}
impl BenchmarkResult {
#[must_use]
pub fn aggregate_stats(&self) -> AggregateStats {
let successful_iterations: Vec<&BenchmarkMetrics> = self
.iterations
.iter()
.filter(|m| m.verified_completion)
.collect();
if successful_iterations.is_empty() {
return AggregateStats::failed();
}
let wall_times: Vec<Duration> = successful_iterations.iter().map(|m| m.wall_time).collect();
let first_usable_outputs: Vec<Duration> = successful_iterations
.iter()
.filter_map(|m| m.first_usable_output)
.collect();
let resume_times: Vec<Duration> = successful_iterations
.iter()
.filter_map(|m| m.resume_time)
.collect();
let throughputs: Vec<f64> = successful_iterations
.iter()
.filter_map(|m| m.throughput_bps())
.collect();
let cpu_ms_per_gib: Vec<f64> = successful_iterations
.iter()
.filter_map(|m| m.cpu_ms_per_gib())
.collect();
let disk_amplification_ratios: Vec<f64> = successful_iterations
.iter()
.filter_map(|m| m.disk_amplification_ratio)
.collect();
let bytes_on_wire: Vec<f64> = successful_iterations
.iter()
.filter_map(|m| m.bytes_on_wire.map(|value| value as f64))
.collect();
let reproducible_failures = self
.iterations
.iter()
.filter(|m| !m.verified_completion)
.filter(|m| m.failure_reproducible == Some(true))
.count();
let failed_iterations = self
.iterations
.iter()
.filter(|m| !m.verified_completion)
.count();
AggregateStats {
success_rate: successful_iterations.len() as f64 / self.iterations.len() as f64,
mean_wall_time: mean_duration(&wall_times),
median_wall_time: median_duration(&wall_times),
std_dev_wall_time: std_dev_duration(&wall_times),
mean_throughput: mean(&throughputs),
median_throughput: median(&throughputs),
std_dev_throughput: std_dev(&throughputs),
mean_cpu_efficiency: mean(
&successful_iterations
.iter()
.filter_map(|m| m.cpu_efficiency())
.collect::<Vec<_>>(),
),
mean_cpu_ms_per_gib: mean(&cpu_ms_per_gib),
mean_memory_peak: successful_iterations
.iter()
.filter_map(|m| m.memory_peak)
.sum::<u64>() as f64
/ successful_iterations.len().max(1) as f64,
mean_bytes_on_wire: mean(&bytes_on_wire),
mean_first_usable_output: mean_duration(&first_usable_outputs),
mean_resume_time: mean_duration(&resume_times),
mean_disk_amplification_ratio: mean(&disk_amplification_ratios),
failure_reproducibility_rate: if failed_iterations > 0 {
reproducible_failures as f64 / failed_iterations as f64
} else {
1.0
},
}
}
#[must_use]
pub fn is_successful(&self) -> bool {
!self.iterations.is_empty() && self.iterations.iter().any(|m| m.verified_completion)
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AggregateStats {
pub success_rate: f64,
pub mean_wall_time: Duration,
pub median_wall_time: Duration,
pub std_dev_wall_time: Duration,
pub mean_throughput: f64,
pub median_throughput: f64,
pub std_dev_throughput: f64,
pub mean_cpu_efficiency: f64,
pub mean_cpu_ms_per_gib: f64,
pub mean_memory_peak: f64,
pub mean_bytes_on_wire: f64,
pub mean_first_usable_output: Duration,
pub mean_resume_time: Duration,
pub mean_disk_amplification_ratio: f64,
pub failure_reproducibility_rate: f64,
}
impl AggregateStats {
#[must_use]
pub fn failed() -> Self {
Self {
success_rate: 0.0,
mean_wall_time: Duration::ZERO,
median_wall_time: Duration::ZERO,
std_dev_wall_time: Duration::ZERO,
mean_throughput: 0.0,
median_throughput: 0.0,
std_dev_throughput: 0.0,
mean_cpu_efficiency: 0.0,
mean_cpu_ms_per_gib: 0.0,
mean_memory_peak: 0.0,
mean_bytes_on_wire: 0.0,
mean_first_usable_output: Duration::ZERO,
mean_resume_time: Duration::ZERO,
mean_disk_amplification_ratio: 0.0,
failure_reproducibility_rate: 0.0,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BenchmarkReport {
pub config_summary: ConfigSummary,
pub baseline_results: BTreeMap<String, BenchmarkResult>,
pub atp_results: BTreeMap<String, BenchmarkResult>,
pub comparison: ComparisonReport,
pub timestamp: chrono::DateTime<chrono::Utc>,
}
impl BenchmarkReport {
#[must_use]
pub fn new(
baseline_results: BTreeMap<String, BenchmarkResult>,
atp_results: BTreeMap<String, BenchmarkResult>,
data_size: u64,
iterations: u32,
) -> Self {
let comparison = ComparisonReport::analyze(&baseline_results, &atp_results);
Self {
config_summary: ConfigSummary {
data_size,
iterations,
},
baseline_results,
atp_results,
comparison,
timestamp: chrono::Utc::now(),
}
}
#[must_use]
pub fn summary(&self) -> String {
let mut summary = String::new();
summary.push_str(&format!(
"Benchmark Report - {} bytes, {} iterations\n\n",
self.config_summary.data_size, self.config_summary.iterations
));
summary.push_str("Baseline Tools:\n");
for (name, result) in &self.baseline_results {
let stats = result.aggregate_stats();
summary.push_str(&format!(
" {}: {:.2} MB/s (success rate: {:.1}%)\n",
name,
stats.mean_throughput / 1_000_000.0,
stats.success_rate * 100.0
));
}
summary.push_str("\nATP Profiles:\n");
for (name, result) in &self.atp_results {
let stats = result.aggregate_stats();
summary.push_str(&format!(
" {}: {:.2} MB/s (success rate: {:.1}%)\n",
name,
stats.mean_throughput / 1_000_000.0,
stats.success_rate * 100.0
));
}
if let Some(best_baseline) = &self.comparison.best_baseline_performance {
if let Some(best_atp) = &self.comparison.best_atp_performance {
summary.push_str(&format!(
"\nBest Performance:\n Baseline: {} ({:.2} MB/s)\n ATP: {} ({:.2} MB/s)\n",
best_baseline.tool_name,
best_baseline.throughput / 1_000_000.0,
best_atp.tool_name,
best_atp.throughput / 1_000_000.0
));
}
}
summary
}
#[must_use]
pub fn public_regression_report(
&self,
report_id: impl Into<String>,
thresholds: &[MetricThreshold],
) -> PublicRegressionReport {
let mut rows = Vec::new();
for (name, result) in &self.baseline_results {
rows.extend(evaluate_result_thresholds(
&format!("baseline:{name}"),
result,
thresholds,
));
}
for (name, result) in &self.atp_results {
rows.extend(evaluate_result_thresholds(
&format!("atp:{name}"),
result,
thresholds,
));
}
PublicRegressionReport::new(report_id, rows)
}
}
pub const PUBLIC_REGRESSION_REPORT_SCHEMA_VERSION: &str = "atp-l3-benchmark-gate-report-v1";
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum ThresholdComparison {
LessThanOrEqual,
GreaterThanOrEqual,
Equals,
}
impl ThresholdComparison {
fn evaluate(self, observed: f64, threshold: f64) -> bool {
match self {
Self::LessThanOrEqual => observed <= threshold,
Self::GreaterThanOrEqual => observed >= threshold,
Self::Equals => (observed - threshold).abs() <= f64::EPSILON,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MetricThreshold {
pub gate_id: String,
pub metric: String,
pub unit: String,
pub comparison: ThresholdComparison,
pub threshold: f64,
pub required: bool,
}
impl MetricThreshold {
#[must_use]
pub fn required(
gate_id: impl Into<String>,
metric: impl Into<String>,
unit: impl Into<String>,
comparison: ThresholdComparison,
threshold: f64,
) -> Self {
Self {
gate_id: gate_id.into(),
metric: metric.into(),
unit: unit.into(),
comparison,
threshold,
required: true,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum BenchmarkGateStatus {
Pass,
Fail,
Skipped,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum BenchmarkSkipReason {
NoIterations,
NoSuccessfulIterations,
MetricUnavailable,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BenchmarkGateRow {
pub gate_id: String,
pub profile: String,
pub metric: String,
pub unit: String,
pub observed: Option<f64>,
pub threshold: f64,
pub comparison: ThresholdComparison,
pub required: bool,
pub status: BenchmarkGateStatus,
pub skip_reason: Option<BenchmarkSkipReason>,
pub replay_pointer: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PublicRegressionReport {
pub schema_version: String,
pub report_id: String,
pub rows: Vec<BenchmarkGateRow>,
pub human_summary: String,
}
impl PublicRegressionReport {
fn new(report_id: impl Into<String>, rows: Vec<BenchmarkGateRow>) -> Self {
let report_id = report_id.into();
let passed = rows
.iter()
.filter(|row| row.status == BenchmarkGateStatus::Pass)
.count();
let failed = rows
.iter()
.filter(|row| row.status == BenchmarkGateStatus::Fail)
.count();
let skipped = rows
.iter()
.filter(|row| row.status == BenchmarkGateStatus::Skipped)
.count();
let human_summary = format!(
"ATP-L3 public regression report {report_id}: {passed} passed, {failed} failed, {skipped} skipped"
);
Self {
schema_version: PUBLIC_REGRESSION_REPORT_SCHEMA_VERSION.to_string(),
report_id,
rows,
human_summary,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ConfigSummary {
pub data_size: u64,
pub iterations: u32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ComparisonReport {
pub best_baseline_performance: Option<PerformanceSummary>,
pub best_atp_performance: Option<PerformanceSummary>,
pub performance_ratios: Vec<PerformanceRatio>,
pub assessment: String,
}
impl ComparisonReport {
#[must_use]
pub fn analyze(
baseline_results: &BTreeMap<String, BenchmarkResult>,
atp_results: &BTreeMap<String, BenchmarkResult>,
) -> Self {
let best_baseline = baseline_results
.iter()
.filter(|(_, result)| result.is_successful())
.map(|(name, result)| {
let stats = result.aggregate_stats();
PerformanceSummary {
tool_name: name.clone(),
throughput: stats.mean_throughput,
wall_time: stats.mean_wall_time,
success_rate: stats.success_rate,
}
})
.max_by(|a, b| {
a.throughput
.partial_cmp(&b.throughput)
.unwrap_or(std::cmp::Ordering::Equal)
});
let best_atp = atp_results
.iter()
.filter(|(_, result)| result.is_successful())
.map(|(name, result)| {
let stats = result.aggregate_stats();
PerformanceSummary {
tool_name: name.clone(),
throughput: stats.mean_throughput,
wall_time: stats.mean_wall_time,
success_rate: stats.success_rate,
}
})
.max_by(|a, b| {
a.throughput
.partial_cmp(&b.throughput)
.unwrap_or(std::cmp::Ordering::Equal)
});
let performance_ratios = Self::calculate_ratios(baseline_results, atp_results);
let assessment = Self::generate_assessment(&best_baseline, &best_atp, &performance_ratios);
Self {
best_baseline_performance: best_baseline,
best_atp_performance: best_atp,
performance_ratios,
assessment,
}
}
fn calculate_ratios(
baseline_results: &BTreeMap<String, BenchmarkResult>,
atp_results: &BTreeMap<String, BenchmarkResult>,
) -> Vec<PerformanceRatio> {
let mut ratios = Vec::new();
for (baseline_name, baseline_result) in baseline_results {
if !baseline_result.is_successful() {
continue;
}
let baseline_stats = baseline_result.aggregate_stats();
for (atp_name, atp_result) in atp_results {
if !atp_result.is_successful() {
continue;
}
let atp_stats = atp_result.aggregate_stats();
let throughput_ratio = if baseline_stats.mean_throughput > 0.0 {
atp_stats.mean_throughput / baseline_stats.mean_throughput
} else {
0.0
};
let time_ratio = if baseline_stats.mean_wall_time.as_secs_f64() > 0.0 {
atp_stats.mean_wall_time.as_secs_f64()
/ baseline_stats.mean_wall_time.as_secs_f64()
} else {
0.0
};
ratios.push(PerformanceRatio {
baseline_tool: baseline_name.clone(),
atp_profile: atp_name.clone(),
throughput_ratio,
time_ratio,
});
}
}
ratios
}
fn generate_assessment(
best_baseline: &Option<PerformanceSummary>,
best_atp: &Option<PerformanceSummary>,
ratios: &[PerformanceRatio],
) -> String {
let comparison_count = ratios.len();
match (best_baseline, best_atp) {
(Some(baseline), Some(atp)) => {
let ratio = atp.throughput / baseline.throughput;
if ratio >= 1.1 {
format!(
"ATP outperforms baseline by {:.1}x across {} comparison(s)",
ratio, comparison_count
)
} else if ratio >= 0.9 {
format!(
"ATP performance is comparable to baseline across {comparison_count} comparison(s)"
)
} else {
format!(
"ATP underperforms baseline by {:.1}x across {} comparison(s)",
1.0 / ratio,
comparison_count
)
}
}
(None, Some(_)) => "ATP succeeded where baseline tools failed".to_string(),
(Some(_), None) => "Baseline tools succeeded but ATP failed".to_string(),
(None, None) => "Both baseline and ATP failed".to_string(),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PerformanceSummary {
pub tool_name: String,
pub throughput: f64,
pub wall_time: Duration,
pub success_rate: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PerformanceRatio {
pub baseline_tool: String,
pub atp_profile: String,
pub throughput_ratio: f64,
pub time_ratio: f64,
}
fn evaluate_result_thresholds(
profile: &str,
result: &BenchmarkResult,
thresholds: &[MetricThreshold],
) -> Vec<BenchmarkGateRow> {
let replay_pointer = format!("asupersync atp bench --profile {profile} --json");
if result.iterations.is_empty() {
return thresholds
.iter()
.map(|threshold| skipped_row(profile, threshold, BenchmarkSkipReason::NoIterations))
.collect();
}
if !result.is_successful() {
return thresholds
.iter()
.map(|threshold| {
skipped_row(
profile,
threshold,
BenchmarkSkipReason::NoSuccessfulIterations,
)
})
.collect();
}
let stats = result.aggregate_stats();
thresholds
.iter()
.map(|threshold| match metric_value(&stats, &threshold.metric) {
Some(observed) => BenchmarkGateRow {
gate_id: threshold.gate_id.clone(),
profile: profile.to_string(),
metric: threshold.metric.clone(),
unit: threshold.unit.clone(),
observed: Some(observed),
threshold: threshold.threshold,
comparison: threshold.comparison,
required: threshold.required,
status: if threshold.comparison.evaluate(observed, threshold.threshold) {
BenchmarkGateStatus::Pass
} else {
BenchmarkGateStatus::Fail
},
skip_reason: None,
replay_pointer: replay_pointer.clone(),
},
None => skipped_row(profile, threshold, BenchmarkSkipReason::MetricUnavailable),
})
.collect()
}
fn skipped_row(
profile: &str,
threshold: &MetricThreshold,
reason: BenchmarkSkipReason,
) -> BenchmarkGateRow {
BenchmarkGateRow {
gate_id: threshold.gate_id.clone(),
profile: profile.to_string(),
metric: threshold.metric.clone(),
unit: threshold.unit.clone(),
observed: None,
threshold: threshold.threshold,
comparison: threshold.comparison,
required: threshold.required,
status: BenchmarkGateStatus::Skipped,
skip_reason: Some(reason),
replay_pointer: format!("asupersync atp bench --profile {profile} --json"),
}
}
fn metric_value(stats: &AggregateStats, metric: &str) -> Option<f64> {
Some(match metric {
"success_rate" | "verified_completion_rate" => stats.success_rate,
"mean_wall_time_ms" => stats.mean_wall_time.as_secs_f64() * 1000.0,
"median_wall_time_ms" => stats.median_wall_time.as_secs_f64() * 1000.0,
"mean_throughput_bps" => stats.mean_throughput,
"median_throughput_bps" => stats.median_throughput,
"mean_cpu_efficiency_bps" => stats.mean_cpu_efficiency,
"mean_cpu_ms_per_gib" => stats.mean_cpu_ms_per_gib,
"mean_memory_peak_bytes" => stats.mean_memory_peak,
"mean_bytes_on_wire" => stats.mean_bytes_on_wire,
"mean_first_usable_output_ms" => stats.mean_first_usable_output.as_secs_f64() * 1000.0,
"mean_resume_time_ms" => stats.mean_resume_time.as_secs_f64() * 1000.0,
"mean_disk_amplification_ratio" => stats.mean_disk_amplification_ratio,
"failure_reproducibility_rate" => stats.failure_reproducibility_rate,
_ => return None,
})
}
fn mean(values: &[f64]) -> f64 {
if values.is_empty() {
0.0
} else {
values.iter().sum::<f64>() / values.len() as f64
}
}
fn median(values: &[f64]) -> f64 {
let mut sorted = values.to_vec();
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let n = sorted.len();
if n == 0 {
0.0
} else if n % 2 == 1 {
sorted[n / 2]
} else {
(sorted[n / 2 - 1] + sorted[n / 2]) / 2.0
}
}
fn std_dev(values: &[f64]) -> f64 {
if values.len() <= 1 {
return 0.0;
}
let mean_val = mean(values);
let variance =
values.iter().map(|x| (x - mean_val).powi(2)).sum::<f64>() / (values.len() - 1) as f64;
variance.sqrt()
}
fn mean_duration(durations: &[Duration]) -> Duration {
if durations.is_empty() {
Duration::ZERO
} else {
let total_nanos: u64 = durations.iter().map(|d| d.as_nanos() as u64).sum();
Duration::from_nanos(total_nanos / durations.len() as u64)
}
}
fn median_duration(durations: &[Duration]) -> Duration {
let mut sorted = durations.to_vec();
sorted.sort();
let n = sorted.len();
if n == 0 {
Duration::ZERO
} else if n % 2 == 1 {
sorted[n / 2]
} else {
(sorted[n / 2 - 1] + sorted[n / 2]) / 2
}
}
fn std_dev_duration(durations: &[Duration]) -> Duration {
if durations.len() <= 1 {
return Duration::ZERO;
}
let mean_nanos = mean_duration(durations).as_nanos() as f64;
let variance = durations
.iter()
.map(|d| (d.as_nanos() as f64 - mean_nanos).powi(2))
.sum::<f64>()
/ (durations.len() - 1) as f64;
Duration::from_nanos(variance.sqrt() as u64)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn benchmark_metrics_calculates_throughput() {
let metrics = BenchmarkMetrics {
wall_time: Duration::from_secs(2),
bytes_transferred: 2_000_000,
verified_completion: true,
cpu_time: None,
memory_peak: None,
bytes_on_wire: None,
first_usable_output: None,
resume_time: None,
disk_amplification_ratio: Some(1.0),
failure_reproducible: None,
failure_mode: None,
};
let throughput = metrics.throughput_bps().unwrap();
assert_eq!(throughput, 1_000_000.0); }
#[test]
fn aggregate_stats_handles_empty_iterations() {
let result = BenchmarkResult {
tool_name: "test".to_string(),
iterations: vec![],
environment: BenchmarkEnvironment::collect().unwrap(),
};
let stats = result.aggregate_stats();
assert_eq!(stats.success_rate, 0.0);
}
#[test]
fn statistical_functions_work() {
let values = vec![1.0, 2.0, 3.0, 4.0, 5.0];
assert_eq!(mean(&values), 3.0);
assert_eq!(median(&values), 3.0);
assert!((std_dev(&values) - 1.58).abs() < 0.1);
}
#[test]
fn public_regression_report_evaluates_thresholds_and_serializes() {
let environment = BenchmarkEnvironment::collect().unwrap();
let result = BenchmarkResult {
tool_name: "atp-clean-lan".to_string(),
iterations: vec![BenchmarkMetrics {
wall_time: Duration::from_millis(200),
cpu_time: Some(Duration::from_millis(4)),
memory_peak: Some(32 * 1024 * 1024),
bytes_transferred: 1024 * 1024,
bytes_on_wire: Some(1024 * 1024),
verified_completion: true,
first_usable_output: Some(Duration::from_millis(75)),
resume_time: Some(Duration::from_millis(120)),
disk_amplification_ratio: Some(1.0),
failure_reproducible: None,
failure_mode: None,
}],
environment,
};
let mut atp_results = BTreeMap::new();
atp_results.insert("clean-lan".to_string(), result);
let report = BenchmarkReport::new(BTreeMap::new(), atp_results, 1024 * 1024, 1);
let public = report.public_regression_report(
"smoke",
&[
MetricThreshold::required(
"first-usable",
"mean_first_usable_output_ms",
"ms",
ThresholdComparison::LessThanOrEqual,
100.0,
),
MetricThreshold::required(
"disk-amp",
"mean_disk_amplification_ratio",
"ratio",
ThresholdComparison::LessThanOrEqual,
1.0,
),
MetricThreshold::required(
"cpu",
"mean_cpu_ms_per_gib",
"ms/GiB",
ThresholdComparison::LessThanOrEqual,
5000.0,
),
],
);
assert_eq!(
public.schema_version,
PUBLIC_REGRESSION_REPORT_SCHEMA_VERSION
);
assert_eq!(public.rows.len(), 3);
assert!(
public
.rows
.iter()
.all(|row| row.status == BenchmarkGateStatus::Pass)
);
assert!(public.human_summary.contains("3 passed"));
let encoded = serde_json::to_string(&public).expect("public report must serialize");
assert!(encoded.contains("mean_first_usable_output_ms"));
assert!(encoded.contains("atp:clean-lan"));
}
#[test]
fn public_regression_report_classifies_skipped_rows() {
let environment = BenchmarkEnvironment::collect().unwrap();
let failed_result = BenchmarkResult {
tool_name: "scp".to_string(),
iterations: vec![BenchmarkMetrics {
wall_time: Duration::from_millis(200),
cpu_time: None,
memory_peak: None,
bytes_transferred: 0,
bytes_on_wire: None,
verified_completion: false,
first_usable_output: None,
resume_time: None,
disk_amplification_ratio: None,
failure_reproducible: Some(true),
failure_mode: Some("tool unavailable".to_string()),
}],
environment,
};
let mut baseline_results = BTreeMap::new();
baseline_results.insert("scp".to_string(), failed_result);
let report = BenchmarkReport::new(baseline_results, BTreeMap::new(), 1024 * 1024, 1);
let public = report.public_regression_report(
"skip-smoke",
&[MetricThreshold::required(
"success-rate",
"verified_completion_rate",
"ratio",
ThresholdComparison::Equals,
1.0,
)],
);
assert_eq!(public.rows.len(), 1);
assert_eq!(public.rows[0].status, BenchmarkGateStatus::Skipped);
assert_eq!(
public.rows[0].skip_reason,
Some(BenchmarkSkipReason::NoSuccessfulIterations)
);
assert!(public.human_summary.contains("1 skipped"));
}
}