perfgate 0.17.0

use crate::domain::compute_stats;
use perfgate_types::{
    AGGREGATE_SCHEMA_V1, AggregateInput, AggregateReceipt, AggregateRunnerMeta, AggregateVerdict,
    AggregateWeightMode, AggregationPolicy, FailIfNOfM, HostInfo, MetricStatus, RunMeta,
    RunReceipt,
};
use std::collections::{BTreeMap, HashSet};
use std::path::PathBuf;

const DEFAULT_VARIANCE_FLOOR: f64 = 1.0;
const OUTLIER_VARIANCE_RATIO: f64 = 4.0;
const MIN_INVERSE_VARIANCE_SAMPLE_COUNT: u32 = 5;

pub struct AggregateRequest {
    pub files: Vec<PathBuf>,
    pub policy: AggregationPolicy,
    pub quorum: Option<f64>,
    pub fail_if: Option<FailIfNOfM>,
    pub weights: BTreeMap<String, f64>,
    pub weight_mode: AggregateWeightMode,
    pub variance_floor: Option<f64>,
    pub runner_class: Option<String>,
    pub lane: Option<String>,
}

pub struct AggregateOutcome {
    pub aggregate: AggregateReceipt,
    pub receipt: RunReceipt,
}

pub struct AggregateUseCase;

struct RunnerWeightProfile {
    sample_count: u32,
    wall_ms_variance: Option<f64>,
    effective_weight: Option<f64>,
    outlier_reason: Option<String>,
}

impl AggregateUseCase {
    pub fn execute(&self, req: AggregateRequest) -> anyhow::Result<AggregateOutcome> {
        if req.files.is_empty() {
            anyhow::bail!("No files provided for aggregation");
        }

        let mut receipts = Vec::new();
        let mut sources = Vec::new();
        let mut seen_run_ids = HashSet::new();
        for file in &req.files {
            let receipt: RunReceipt = perfgate_types::read_json_file(file)?;
            if !seen_run_ids.insert(receipt.run.id.clone()) {
                anyhow::bail!(
                    "duplicate run id detected during aggregation: {}",
                    receipt.run.id
                );
            }
            sources.push(file.display().to_string());
            receipts.push(receipt);
        }

        // Verify all receipts are for the same bench name
        let first_bench_name = &receipts[0].bench.name;
        for r in &receipts {
            if &r.bench.name != first_bench_name {
                anyhow::bail!(
                    "Cannot aggregate receipts for different benchmarks: {} vs {}",
                    first_bench_name,
                    r.bench.name
                );
            }
        }

        let mut combined_samples = Vec::new();
        for r in &receipts {
            combined_samples.extend(r.samples.clone());
        }

        // We assume work_units is consistent across the receipts.
        // If they differ, we take the first one.
        let work_units = receipts[0].bench.work_units;

        let stats = compute_stats(&combined_samples, work_units)?;
        let variance_floor = req.variance_floor.unwrap_or(DEFAULT_VARIANCE_FLOOR);
        let runner_profiles = build_runner_weight_profiles(&receipts, &req, variance_floor);

        // Update bench metadata.
        let mut bench = receipts[0].bench.clone();
        bench.repeat = combined_samples.len() as u32;

        let receipt = RunReceipt {
            schema: perfgate_types::RUN_SCHEMA_V1.to_string(),
            tool: receipts[0].tool.clone(),
            run: RunMeta {
                id: uuid::Uuid::new_v4().to_string(),
                started_at: receipts[0].run.started_at.clone(),
                ended_at: receipts
                    .last()
                    .ok_or_else(|| anyhow::anyhow!("no receipts after aggregation"))?
                    .run
                    .ended_at
                    .clone(),
                host: HostInfo {
                    os: "fleet".to_string(),
                    arch: "mixed".to_string(),
                    cpu_count: None,
                    memory_bytes: None,
                    hostname_hash: None,
                },
            },
            bench,
            samples: combined_samples,
            stats,
        };

        let mut inputs = Vec::with_capacity(receipts.len());
        for (idx, r) in receipts.iter().enumerate() {
            let label = format!("{}-{}", r.run.host.os, r.run.host.arch);
            let status = input_status(r);
            let profile = &runner_profiles[idx];
            inputs.push(AggregateInput {
                source: sources[idx].clone(),
                run_id: r.run.id.clone(),
                bench_name: r.bench.name.clone(),
                host: r.run.host.clone(),
                runner: AggregateRunnerMeta {
                    label: label.clone(),
                    class: req.runner_class.clone(),
                    lane: req.lane.clone(),
                    weight: req.weights.get(&label).copied(),
                    sample_count: Some(profile.sample_count),
                    wall_ms_variance: profile.wall_ms_variance,
                    effective_weight: profile.effective_weight,
                    outlier_reason: profile.outlier_reason.clone(),
                },
                status,
                reasons: input_reasons(r),
            });
        }

        let mut warnings = host_mismatch_warnings(&receipts);
        let outlier_runners = runner_profiles
            .iter()
            .filter(|profile| profile.outlier_reason.is_some())
            .count();
        if outlier_runners > 0 {
            warnings.push(format!(
                "{outlier_runners} runner(s) flagged as wall_ms variance outliers"
            ));
        }
        if matches!(req.weight_mode, AggregateWeightMode::InverseVariance) {
            let low_sample_runners = runner_profiles
                .iter()
                .filter(|profile| profile.sample_count < MIN_INVERSE_VARIANCE_SAMPLE_COUNT)
                .count();
            if low_sample_runners > 0 {
                warnings.push(format!(
                    "{low_sample_runners} runner(s) have fewer than {MIN_INVERSE_VARIANCE_SAMPLE_COUNT} measured sample(s); inverse-variance weights may be unstable"
                ));
            }

            let missing_variance_runners = runner_profiles
                .iter()
                .filter(|profile| profile.wall_ms_variance.is_none())
                .count();
            if missing_variance_runners > 0 {
                warnings.push(format!(
                    "{missing_variance_runners} runner(s) do not have enough wall_ms samples to estimate variance; using variance floor"
                ));
            }
        }
        let verdict = evaluate_policy(&inputs, &req);

        let aggregate = AggregateReceipt {
            schema: AGGREGATE_SCHEMA_V1.to_string(),
            tool: receipts[0].tool.clone(),
            run: receipt.run.clone(),
            benchmark: first_bench_name.clone(),
            policy: req.policy,
            quorum: req.quorum,
            fail_if: req.fail_if,
            weight_mode: req.weight_mode,
            weights: req.weights,
            variance_floor: matches!(req.weight_mode, AggregateWeightMode::InverseVariance)
                .then_some(variance_floor),
            inputs,
            verdict,
            warnings,
        };

        Ok(AggregateOutcome { aggregate, receipt })
    }
}

fn input_status(receipt: &RunReceipt) -> MetricStatus {
    if receipt
        .samples
        .iter()
        .filter(|sample| !sample.warmup)
        .any(|sample| sample.exit_code != 0 || sample.timed_out)
    {
        MetricStatus::Fail
    } else {
        MetricStatus::Pass
    }
}

fn input_reasons(receipt: &RunReceipt) -> Vec<String> {
    let failed = receipt
        .samples
        .iter()
        .filter(|sample| !sample.warmup && sample.exit_code != 0)
        .count();
    let timed_out = receipt
        .samples
        .iter()
        .filter(|sample| !sample.warmup && sample.timed_out)
        .count();
    let mut reasons = Vec::new();
    if failed > 0 {
        reasons.push(format!("{failed} sample(s) had non-zero exit codes"));
    }
    if timed_out > 0 {
        reasons.push(format!("{timed_out} sample(s) timed out"));
    }
    reasons
}

fn host_mismatch_warnings(receipts: &[RunReceipt]) -> Vec<String> {
    let Some(first) = receipts.first() else {
        return Vec::new();
    };
    let mut warnings = Vec::new();
    for r in receipts.iter().skip(1) {
        warnings.extend(compare_hosts(&first.run.host, &r.run.host));
    }
    warnings.sort();
    warnings.dedup();
    warnings
}

fn compare_hosts(a: &HostInfo, b: &HostInfo) -> Vec<String> {
    let mut reasons = Vec::new();
    if a.os != b.os {
        reasons.push(format!("host os mismatch: {} vs {}", a.os, b.os));
    }
    if a.arch != b.arch {
        reasons.push(format!("host arch mismatch: {} vs {}", a.arch, b.arch));
    }
    if let (Some(ca), Some(cb)) = (a.cpu_count, b.cpu_count)
        && (ca > cb.saturating_mul(2) || cb > ca.saturating_mul(2))
    {
        reasons.push(format!(
            "host cpu_count differs significantly: {} vs {}",
            ca, cb
        ));
    }
    if let (Some(ma), Some(mb)) = (a.memory_bytes, b.memory_bytes)
        && (ma > mb.saturating_mul(2) || mb > ma.saturating_mul(2))
    {
        reasons.push(format!(
            "host memory_bytes differs significantly: {} vs {}",
            ma, mb
        ));
    }
    if let (Some(ha), Some(hb)) = (&a.hostname_hash, &b.hostname_hash)
        && ha != hb
    {
        reasons.push("host hostname_hash mismatch".to_string());
    }
    reasons
}

fn build_runner_weight_profiles(
    receipts: &[RunReceipt],
    req: &AggregateRequest,
    variance_floor: f64,
) -> Vec<RunnerWeightProfile> {
    let normalized_variance_floor = variance_floor.max(f64::EPSILON);
    let mut profiles: Vec<_> = receipts
        .iter()
        .map(|receipt| {
            let (sample_count, wall_ms_variance) = wall_ms_variance(receipt);
            let label = format!("{}-{}", receipt.run.host.os, receipt.run.host.arch);
            let configured_weight = req.weights.get(&label).copied().unwrap_or(1.0);
            let effective_weight = matches!(req.policy, AggregationPolicy::Weighted).then_some(
                match req.weight_mode {
                    AggregateWeightMode::Configured => configured_weight,
                    AggregateWeightMode::InverseVariance => {
                        configured_weight
                            / wall_ms_variance
                                .unwrap_or(normalized_variance_floor)
                                .max(normalized_variance_floor)
                    }
                },
            );
            RunnerWeightProfile {
                sample_count,
                wall_ms_variance,
                effective_weight,
                outlier_reason: None,
            }
        })
        .collect();

    if matches!(req.weight_mode, AggregateWeightMode::InverseVariance)
        && let Some(median_variance) = median(
            &profiles
                .iter()
                .filter_map(|profile| profile.wall_ms_variance)
                .collect::<Vec<_>>(),
        )
    {
        let threshold = median_variance.max(normalized_variance_floor) * OUTLIER_VARIANCE_RATIO;
        for profile in &mut profiles {
            if let Some(observed_variance) = profile.wall_ms_variance
                && observed_variance > threshold
            {
                profile.outlier_reason = Some(format!(
                    "wall_ms variance {:.3} exceeds peer median {:.3}",
                    observed_variance, median_variance
                ));
            }
        }
    }

    profiles
}

fn wall_ms_variance(receipt: &RunReceipt) -> (u32, Option<f64>) {
    let values: Vec<f64> = receipt
        .samples
        .iter()
        .filter(|sample| !sample.warmup)
        .map(|sample| sample.wall_ms as f64)
        .collect();
    let sample_count = values.len() as u32;
    if values.len() < 2 {
        return (sample_count, None);
    }

    let mean = values.iter().sum::<f64>() / values.len() as f64;
    let variance = values
        .iter()
        .map(|value| (value - mean).powi(2))
        .sum::<f64>()
        / (values.len() - 1) as f64;
    (sample_count, Some(variance))
}

fn median(values: &[f64]) -> Option<f64> {
    if values.is_empty() {
        return None;
    }
    let mut sorted = values.to_vec();
    sorted.sort_by(|a, b| a.total_cmp(b));
    let mid = sorted.len() / 2;
    Some(if sorted.len().is_multiple_of(2) {
        (sorted[mid - 1] + sorted[mid]) / 2.0
    } else {
        sorted[mid]
    })
}

fn evaluate_policy(inputs: &[AggregateInput], req: &AggregateRequest) -> AggregateVerdict {
    let passed = inputs
        .iter()
        .filter(|i| i.status == MetricStatus::Pass)
        .count() as u32;
    let failed = inputs.len() as u32 - passed;
    let total = inputs.len() as u32;

    let weighted_total: f64 = inputs
        .iter()
        .map(|input| input.runner.effective_weight.unwrap_or(1.0))
        .sum();
    let weighted_pass: f64 = inputs
        .iter()
        .filter(|i| i.status == MetricStatus::Pass)
        .map(|input| input.runner.effective_weight.unwrap_or(1.0))
        .sum();
    let outlier_runners = inputs
        .iter()
        .filter(|input| input.runner.outlier_reason.is_some())
        .count() as u32;

    let mut reasons = Vec::new();
    let status = match req.policy {
        AggregationPolicy::All => {
            if failed == 0 {
                MetricStatus::Pass
            } else {
                reasons.push(format!(
                    "{failed} runner(s) failed under all-must-pass policy"
                ));
                MetricStatus::Fail
            }
        }
        AggregationPolicy::Majority => {
            if passed > failed {
                MetricStatus::Pass
            } else {
                reasons.push(format!(
                    "majority policy failed: pass={} fail={}",
                    passed, failed
                ));
                MetricStatus::Fail
            }
        }
        AggregationPolicy::Weighted => {
            let required = req.quorum.unwrap_or(0.5).clamp(0.0, 1.0);
            let ratio = if weighted_total == 0.0 {
                0.0
            } else {
                weighted_pass / weighted_total
            };
            if ratio >= required {
                MetricStatus::Pass
            } else {
                reasons.push(format!(
                    "weighted policy failed: score={ratio:.3}, required={required:.3}"
                ));
                MetricStatus::Fail
            }
        }
        AggregationPolicy::Quorum => {
            let required = req.quorum.unwrap_or(0.5).clamp(0.0, 1.0);
            let ratio = if total == 0 {
                0.0
            } else {
                passed as f64 / total as f64
            };
            if ratio >= required {
                MetricStatus::Pass
            } else {
                reasons.push(format!(
                    "quorum policy failed: score={ratio:.3}, required={required:.3}"
                ));
                MetricStatus::Fail
            }
        }
        AggregationPolicy::FailIfNOfM => {
            let fail_if = req.fail_if.clone().unwrap_or(FailIfNOfM { n: 1, m: None });
            let m = fail_if.m.unwrap_or(total);
            if total < m {
                reasons.push(format!(
                    "insufficient receipts: expected {m}, received {total}"
                ));
                MetricStatus::Fail
            } else if failed >= fail_if.n {
                reasons.push(format!(
                    "fail-if-n-of-m policy triggered: failed={failed} threshold={}",
                    fail_if.n
                ));
                MetricStatus::Fail
            } else {
                MetricStatus::Pass
            }
        }
    };

    AggregateVerdict {
        status,
        passed,
        failed,
        total,
        weighted_pass: matches!(req.policy, AggregationPolicy::Weighted).then_some(weighted_pass),
        weighted_total: matches!(req.policy, AggregationPolicy::Weighted).then_some(weighted_total),
        required: matches!(
            req.policy,
            AggregationPolicy::Weighted | AggregationPolicy::Quorum
        )
        .then_some(req.quorum.unwrap_or(0.5)),
        outlier_runners: (outlier_runners > 0).then_some(outlier_runners),
        reasons,
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use perfgate_types::{BenchMeta, Sample, Stats, ToolInfo, U64Summary};
    use std::fs;
    use tempfile::tempdir;

    fn mk_receipt(id: &str, os: &str, arch: &str, exit_code: i32) -> RunReceipt {
        RunReceipt {
            schema: perfgate_types::RUN_SCHEMA_V1.to_string(),
            tool: ToolInfo {
                name: "perfgate".to_string(),
                version: "test".to_string(),
            },
            run: RunMeta {
                id: id.to_string(),
                started_at: "2026-01-01T00:00:00Z".to_string(),
                ended_at: "2026-01-01T00:00:01Z".to_string(),
                host: HostInfo {
                    os: os.to_string(),
                    arch: arch.to_string(),
                    cpu_count: Some(8),
                    memory_bytes: Some(16 * 1024 * 1024 * 1024),
                    hostname_hash: None,
                },
            },
            bench: BenchMeta {
                name: "bench".to_string(),
                cwd: None,
                command: vec!["echo".to_string(), "x".to_string()],
                repeat: 1,
                warmup: 0,
                work_units: None,
                timeout_ms: None,
            },
            samples: vec![Sample {
                wall_ms: 10,
                exit_code,
                warmup: false,
                timed_out: false,
                cpu_ms: None,
                page_faults: None,
                ctx_switches: None,
                max_rss_kb: None,
                io_read_bytes: None,
                io_write_bytes: None,
                network_packets: None,
                energy_uj: None,
                binary_bytes: None,
                stdout: None,
                stderr: None,
            }],
            stats: Stats {
                wall_ms: U64Summary::new(10, 10, 10),
                cpu_ms: None,
                page_faults: None,
                ctx_switches: None,
                max_rss_kb: None,
                io_read_bytes: None,
                io_write_bytes: None,
                network_packets: None,
                energy_uj: None,
                binary_bytes: None,
                throughput_per_s: None,
            },
        }
    }

    fn mk_receipt_with_wall_samples(
        id: &str,
        os: &str,
        arch: &str,
        exit_code: i32,
        wall_samples: &[u64],
    ) -> RunReceipt {
        let mut receipt = mk_receipt(id, os, arch, exit_code);
        receipt.samples = wall_samples
            .iter()
            .map(|wall_ms| Sample {
                wall_ms: *wall_ms,
                exit_code,
                warmup: false,
                timed_out: false,
                cpu_ms: None,
                page_faults: None,
                ctx_switches: None,
                max_rss_kb: None,
                io_read_bytes: None,
                io_write_bytes: None,
                network_packets: None,
                energy_uj: None,
                binary_bytes: None,
                stdout: None,
                stderr: None,
            })
            .collect();
        receipt.bench.repeat = wall_samples.len() as u32;
        receipt.stats = compute_stats(&receipt.samples, receipt.bench.work_units).unwrap();
        receipt
    }

    #[test]
    fn majority_policy_passes_when_most_inputs_pass() {
        let inputs = vec![
            AggregateInput {
                source: "a".to_string(),
                run_id: "1".to_string(),
                bench_name: "bench".to_string(),
                host: mk_receipt("1", "linux", "x86_64", 0).run.host,
                runner: AggregateRunnerMeta {
                    label: "ubuntu-x86_64".to_string(),
                    class: None,
                    lane: None,
                    weight: None,
                    sample_count: None,
                    wall_ms_variance: None,
                    effective_weight: None,
                    outlier_reason: None,
                },
                status: MetricStatus::Pass,
                reasons: vec![],
            },
            AggregateInput {
                source: "b".to_string(),
                run_id: "2".to_string(),
                bench_name: "bench".to_string(),
                host: mk_receipt("2", "linux", "x86_64", 1).run.host,
                runner: AggregateRunnerMeta {
                    label: "ubuntu-x86_64".to_string(),
                    class: None,
                    lane: None,
                    weight: None,
                    sample_count: None,
                    wall_ms_variance: None,
                    effective_weight: None,
                    outlier_reason: None,
                },
                status: MetricStatus::Fail,
                reasons: vec!["non-zero".to_string()],
            },
            AggregateInput {
                source: "c".to_string(),
                run_id: "3".to_string(),
                bench_name: "bench".to_string(),
                host: mk_receipt("3", "linux", "x86_64", 0).run.host,
                runner: AggregateRunnerMeta {
                    label: "ubuntu-x86_64".to_string(),
                    class: None,
                    lane: None,
                    weight: None,
                    sample_count: None,
                    wall_ms_variance: None,
                    effective_weight: None,
                    outlier_reason: None,
                },
                status: MetricStatus::Pass,
                reasons: vec![],
            },
        ];

        let verdict = evaluate_policy(
            &inputs,
            &AggregateRequest {
                files: vec![],
                policy: AggregationPolicy::Majority,
                quorum: None,
                fail_if: None,
                weights: BTreeMap::new(),
                weight_mode: AggregateWeightMode::Configured,
                variance_floor: None,
                runner_class: None,
                lane: None,
            },
        );
        assert_eq!(verdict.status, MetricStatus::Pass);
    }

    #[test]
    fn weighted_policy_uses_configured_weights() {
        let mut weights = BTreeMap::new();
        weights.insert("ubuntu-x86_64".to_string(), 0.8);
        weights.insert("macos-aarch64".to_string(), 0.2);
        let inputs = vec![
            AggregateInput {
                source: "a".to_string(),
                run_id: "1".to_string(),
                bench_name: "bench".to_string(),
                host: mk_receipt("1", "linux", "x86_64", 0).run.host,
                runner: AggregateRunnerMeta {
                    label: "ubuntu-x86_64".to_string(),
                    class: None,
                    lane: None,
                    weight: Some(0.8),
                    sample_count: None,
                    wall_ms_variance: None,
                    effective_weight: Some(0.8),
                    outlier_reason: None,
                },
                status: MetricStatus::Pass,
                reasons: vec![],
            },
            AggregateInput {
                source: "b".to_string(),
                run_id: "2".to_string(),
                bench_name: "bench".to_string(),
                host: mk_receipt("2", "macos", "aarch64", 1).run.host,
                runner: AggregateRunnerMeta {
                    label: "macos-aarch64".to_string(),
                    class: None,
                    lane: None,
                    weight: Some(0.2),
                    sample_count: None,
                    wall_ms_variance: None,
                    effective_weight: Some(0.2),
                    outlier_reason: None,
                },
                status: MetricStatus::Fail,
                reasons: vec!["non-zero".to_string()],
            },
        ];
        let verdict = evaluate_policy(
            &inputs,
            &AggregateRequest {
                files: vec![],
                policy: AggregationPolicy::Weighted,
                quorum: Some(0.7),
                fail_if: None,
                weights,
                weight_mode: AggregateWeightMode::Configured,
                variance_floor: None,
                runner_class: None,
                lane: None,
            },
        );
        assert_eq!(verdict.status, MetricStatus::Pass);
        assert_eq!(verdict.weighted_pass, Some(0.8));
    }

    #[test]
    fn inverse_variance_weighting_downranks_noisy_failures_and_marks_outliers() {
        let dir = tempdir().unwrap();
        let stable_a_path = dir.path().join("stable-a.json");
        let stable_b_path = dir.path().join("stable-b.json");
        let noisy_path = dir.path().join("noisy.json");

        let stable_a =
            mk_receipt_with_wall_samples("1", "linux", "x86_64", 0, &[100, 100, 100, 100]);
        let stable_b =
            mk_receipt_with_wall_samples("2", "linux", "x86_64", 0, &[110, 110, 110, 110]);
        let noisy = mk_receipt_with_wall_samples("3", "linux", "x86_64", 1, &[80, 140, 60, 160]);

        fs::write(&stable_a_path, serde_json::to_string(&stable_a).unwrap()).unwrap();
        fs::write(&stable_b_path, serde_json::to_string(&stable_b).unwrap()).unwrap();
        fs::write(&noisy_path, serde_json::to_string(&noisy).unwrap()).unwrap();

        let configured = AggregateUseCase
            .execute(AggregateRequest {
                files: vec![
                    stable_a_path.clone(),
                    stable_b_path.clone(),
                    noisy_path.clone(),
                ],
                policy: AggregationPolicy::Weighted,
                quorum: Some(0.75),
                fail_if: None,
                weights: BTreeMap::new(),
                weight_mode: AggregateWeightMode::Configured,
                variance_floor: None,
                runner_class: None,
                lane: None,
            })
            .unwrap();
        assert_eq!(configured.aggregate.verdict.status, MetricStatus::Fail);

        let inverse = AggregateUseCase
            .execute(AggregateRequest {
                files: vec![stable_a_path, stable_b_path, noisy_path],
                policy: AggregationPolicy::Weighted,
                quorum: Some(0.75),
                fail_if: None,
                weights: BTreeMap::new(),
                weight_mode: AggregateWeightMode::InverseVariance,
                variance_floor: Some(1.0),
                runner_class: None,
                lane: None,
            })
            .unwrap();

        assert_eq!(inverse.aggregate.verdict.status, MetricStatus::Pass);
        assert_eq!(
            inverse.aggregate.weight_mode,
            AggregateWeightMode::InverseVariance
        );
        assert_eq!(inverse.aggregate.variance_floor, Some(1.0));
        assert_eq!(inverse.aggregate.verdict.outlier_runners, Some(1));

        let noisy_input = inverse
            .aggregate
            .inputs
            .iter()
            .find(|input| input.run_id == "3")
            .unwrap();
        let stable_input = inverse
            .aggregate
            .inputs
            .iter()
            .find(|input| input.run_id == "1")
            .unwrap();

        assert!(noisy_input.runner.outlier_reason.is_some());
        assert!(
            stable_input.runner.effective_weight.unwrap()
                > noisy_input.runner.effective_weight.unwrap()
        );
    }

    #[test]
    fn inverse_variance_warns_when_sample_counts_are_low() {
        let dir = tempdir().unwrap();
        let first_path = dir.path().join("first.json");
        let second_path = dir.path().join("second.json");

        let first = mk_receipt_with_wall_samples("1", "linux", "x86_64", 0, &[100]);
        let second = mk_receipt_with_wall_samples("2", "linux", "x86_64", 0, &[110]);

        fs::write(&first_path, serde_json::to_string(&first).unwrap()).unwrap();
        fs::write(&second_path, serde_json::to_string(&second).unwrap()).unwrap();

        let outcome = AggregateUseCase
            .execute(AggregateRequest {
                files: vec![first_path, second_path],
                policy: AggregationPolicy::Weighted,
                quorum: Some(0.5),
                fail_if: None,
                weights: BTreeMap::new(),
                weight_mode: AggregateWeightMode::InverseVariance,
                variance_floor: Some(1.0),
                runner_class: None,
                lane: None,
            })
            .unwrap();

        assert!(
            outcome
                .aggregate
                .warnings
                .iter()
                .any(|warning| warning.contains("fewer than 5 measured sample(s)"))
        );
        assert!(
            outcome
                .aggregate
                .warnings
                .iter()
                .any(|warning| warning.contains("using variance floor"))
        );
    }

    #[test]
    fn input_status_ignores_warmup_failures() {
        let mut receipt = mk_receipt("1", "linux", "x86_64", 0);
        receipt.samples = vec![
            Sample {
                wall_ms: 10,
                exit_code: 1,
                warmup: true,
                timed_out: false,
                cpu_ms: None,
                page_faults: None,
                ctx_switches: None,
                max_rss_kb: None,
                io_read_bytes: None,
                io_write_bytes: None,
                network_packets: None,
                energy_uj: None,
                binary_bytes: None,
                stdout: None,
                stderr: None,
            },
            Sample {
                wall_ms: 10,
                exit_code: 0,
                warmup: false,
                timed_out: false,
                cpu_ms: None,
                page_faults: None,
                ctx_switches: None,
                max_rss_kb: None,
                io_read_bytes: None,
                io_write_bytes: None,
                network_packets: None,
                energy_uj: None,
                binary_bytes: None,
                stdout: None,
                stderr: None,
            },
        ];

        assert_eq!(input_status(&receipt), MetricStatus::Pass);
        assert!(input_reasons(&receipt).is_empty());
    }

    #[test]
    fn execute_returns_combined_run_receipt_and_aggregate_metadata() {
        let dir = tempdir().unwrap();
        let first_path = dir.path().join("run1.json");
        let second_path = dir.path().join("run2.json");

        let mut first = mk_receipt("1", "linux", "x86_64", 0);
        first.samples[0].wall_ms = 100;
        first.stats.wall_ms = U64Summary::new(100, 100, 100);

        let mut second = mk_receipt("2", "linux", "x86_64", 0);
        second.samples[0].wall_ms = 110;
        second.stats.wall_ms = U64Summary::new(110, 110, 110);

        fs::write(&first_path, serde_json::to_string(&first).unwrap()).unwrap();
        fs::write(&second_path, serde_json::to_string(&second).unwrap()).unwrap();

        let outcome = AggregateUseCase
            .execute(AggregateRequest {
                files: vec![first_path, second_path],
                policy: AggregationPolicy::All,
                quorum: None,
                fail_if: None,
                weights: BTreeMap::new(),
                weight_mode: AggregateWeightMode::Configured,
                variance_floor: None,
                runner_class: None,
                lane: None,
            })
            .unwrap();

        assert_eq!(outcome.receipt.schema, perfgate_types::RUN_SCHEMA_V1);
        assert_eq!(outcome.receipt.samples.len(), 2);
        assert_eq!(outcome.receipt.stats.wall_ms.median, 105);

        assert_eq!(outcome.aggregate.schema, AGGREGATE_SCHEMA_V1);
        assert_eq!(outcome.aggregate.benchmark, "bench");
        assert_eq!(outcome.aggregate.policy, AggregationPolicy::All);
        assert_eq!(
            outcome.aggregate.weight_mode,
            AggregateWeightMode::Configured
        );
        assert_eq!(outcome.aggregate.variance_floor, None);
        assert_eq!(outcome.aggregate.inputs.len(), 2);
        assert_eq!(outcome.aggregate.verdict.status, MetricStatus::Pass);
    }
}