cobre-sddp 0.3.2

//! Automatic PAR(p) parameter estimation from historical inflow observations.
//!
//! This module bridges `cobre-io` (case loading) and `cobre-stochastic` (PAR fitting):
//! it checks the input file manifest, loads history when explicit model statistics are
//! absent, runs the fitting pipeline, and returns an updated [`System`].
//!
//! ## Input path matrix
//!
//! | `inflow_history.parquet` | `inflow_seasonal_stats.parquet` | `inflow_ar_coefficients.parquet` | Behaviour |
//! |---|---|---|---|
//! | absent | any | any | Return `system` unchanged. |
//! | present | present | present | Return `system` unchanged (explicit stats take priority). |
//! | present | present | absent | Partial estimation: load existing stats, estimate only AR coefficients. |
//! | present | absent | any | Full estimation; update `inflow_models` and optionally `correlation`. |
//!
//! `correlation.json` is handled independently: if present, the existing
//! `system.correlation()` is kept; if absent, the correlation is estimated from residuals.
//!
//! ## PACF order selection
//!
//! When `config.estimation.order_selection = "pacf"` (the default), the module
//! computes the periodic PACF via progressive periodic Yule-Walker matrix solves,
//! selects the order using a 95% significance threshold, then estimates
//! coefficients at the selected order using the periodic YW system.

use std::collections::{BTreeMap, HashMap, HashSet};
use std::path::Path;

use chrono::NaiveDate;
use cobre_core::{EntityId, System};
use cobre_io::{
    Config, FileManifest, LoadError, ValidationContext,
    config::OrderSelectionMethod,
    parse_inflow_history,
    scenarios::{InflowArCoefficientRow, InflowSeasonalStatsRow, assemble_inflow_models},
    validate_structure,
};
use cobre_stochastic::{
    StochasticError,
    par::contribution::{
        check_negative_contributions, compute_contributions, find_max_valid_order,
        has_negative_phi1,
    },
    par::fitting::{
        ArCoefficientEstimate, SeasonalStats, estimate_ar_coefficients_with_season_map,
        estimate_correlation_with_season_map, estimate_periodic_ar_coefficients,
        estimate_seasonal_stats_with_season_map, find_season_for_date, periodic_pacf,
        select_order_pacf,
    },
};

/// Errors that can occur during the automatic estimation pipeline.
#[derive(Debug, thiserror::Error)]
pub enum EstimationError {
    /// File read or parse failure during estimation.
    #[error("load error: {0}")]
    Load(#[from] LoadError),

    /// Estimation failed due to insufficient data.
    #[error("estimation failed: {0}")]
    Stochastic(#[from] StochasticError),
}

/// Reason for an AR order reduction.
///
/// Distinguishes the three mechanisms that can reduce a season's AR order
/// during the estimation pipeline.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ReductionReason {
    /// Coefficient exceeds the magnitude-bound safety threshold.
    MagnitudeBound,
    /// First AR coefficient (`phi_1`) is negative, contradicting
    /// hydrological persistence.
    Phi1Negative,
    /// Contribution analysis detected negative entries at one or more lags.
    NegativeContribution,
}

impl ReductionReason {
    /// Convert to a stable string representation for diagnostic output.
    #[must_use]
    pub fn as_str(self) -> &'static str {
        match self {
            Self::MagnitudeBound => "magnitude_bound",
            Self::Phi1Negative => "phi1_negative",
            Self::NegativeContribution => "negative_contribution",
        }
    }
}

/// A single contribution-based order reduction event.
///
/// Records that a season's AR order was reduced because the contribution
/// analysis detected negative entries, indicating potential model instability.
#[derive(Debug, Clone)]
pub struct ContributionReduction {
    /// Season where the reduction occurred.
    pub season_id: usize,
    /// Order before reduction (from AIC or previous iteration).
    pub original_order: usize,
    /// Order after reduction (the maximum valid order from contributions).
    pub reduced_order: usize,
    /// Contribution values at the original order that triggered the reduction.
    pub contributions: Vec<f64>,
    /// The mechanism that triggered this reduction.
    pub reason: ReductionReason,
}

/// Per-hydro AIC diagnostic data captured during AIC-based AR order selection.
///
/// Holds the selected order, fitted AR coefficients, and any contribution-based
/// order reductions for each season at the selected order.
#[derive(Debug, Clone)]
pub struct HydroEstimationEntry {
    /// The selected AR order for this hydro plant (maximum across all seasons).
    ///
    /// This is the maximum of the per-season selected orders, which determines
    /// the coefficient vector length in the output.
    pub selected_order: u32,
    /// Fitted AR lag coefficients, one inner vector per season sorted by `season_id` ascending.
    ///
    /// Each inner vector holds the coefficients at the selected order for
    /// that season. Seasons where estimation was skipped (zero std, insufficient
    /// observations) have an empty coefficient vector.
    pub coefficients: Vec<Vec<f64>>,
    /// Records of contribution-based order reductions applied during fitting.
    ///
    /// Each entry documents a season where the initial order (from PACF or fixed
    /// selection) was reduced due to negative contributions. Empty when no
    /// reductions were needed.
    pub contribution_reductions: Vec<ContributionReduction>,
}

/// Computation-side summary of the AR estimation pipeline.
///
/// Contains one [`HydroEstimationEntry`] per hydro plant that was fitted,
/// keyed by [`EntityId`] for canonical deterministic ordering.
#[must_use]
#[derive(Debug, Clone)]
pub struct EstimationReport {
    /// Per-hydro diagnostic entries, keyed by entity ID.
    pub entries: BTreeMap<EntityId, HydroEstimationEntry>,
    /// The order selection method used (e.g., `"AIC"`, `"PACF"`, `"fixed"`).
    pub method: String,
}

/// Result of validating an AR order via contribution analysis.
///
/// Captures whether the current order is stable (all contributions non-negative),
/// the maximum valid order if not, and the computed contribution values for
/// diagnostic reporting.
#[derive(Debug, Clone)]
pub struct ContributionValidationResult {
    /// Whether the current order passed contribution validation.
    pub valid: bool,
    /// Maximum valid order (same as `current_order` if valid, less otherwise).
    pub max_valid_order: usize,
    /// Computed contribution values for the current order.
    pub contributions: Vec<f64>,
}

/// Validate an AR order for a single (entity, season) pair via contribution analysis.
///
/// Computes the recursively-composed contributions for the given season at the
/// current order, then checks for negative entries. Returns a result indicating
/// whether the order is stable and, if not, the maximum valid order.
///
/// When `current_order == 0`, returns immediately with `valid: true` and no
/// contributions (an order-0 model has no autoregressive dependence to validate).
fn validate_order_contributions(
    season_id: usize,
    n_seasons: usize,
    current_order: usize,
    all_season_coefficients: &[Vec<f64>],
    std_by_season: &[f64],
) -> ContributionValidationResult {
    if current_order == 0 {
        return ContributionValidationResult {
            valid: true,
            max_valid_order: 0,
            contributions: Vec::new(),
        };
    }

    let coeff_refs: Vec<&[f64]> = all_season_coefficients.iter().map(Vec::as_slice).collect();
    let contributions = compute_contributions(
        season_id,
        n_seasons,
        current_order,
        &coeff_refs,
        std_by_season,
    );

    let valid = !check_negative_contributions(&contributions);
    let max_valid_order = if valid {
        current_order
    } else {
        find_max_valid_order(&contributions)
    };

    ContributionValidationResult {
        valid,
        max_valid_order,
        contributions,
    }
}

/// Compute `residual_std_ratio` from a normalised prediction error variance (`sigma2`).
///
/// The Levinson-Durbin recursion can produce negative `sigma2` values when the
/// estimated autocorrelation sequence does not form a positive-definite Toeplitz
/// matrix. In Rust, `f64::sqrt()` of a negative number returns `NaN`, and
/// `NaN.clamp(a, b)` propagates `NaN` (IEEE 754 semantics). A `NaN`
/// `residual_std_ratio` would then poison the noise scale and eventually cause
/// the `HiGHS` solver to reject row-bound patches.
///
/// This helper treats non-positive `sigma2` as evidence that the AR model at the
/// given order is numerically unstable and falls back to the white-noise baseline
/// (`residual_std_ratio = 1.0`).
fn residual_std_ratio_from_sigma2(sigma2: f64) -> f64 {
    if sigma2 <= 0.0 {
        1.0
    } else {
        sigma2.sqrt().clamp(f64::EPSILON, 1.0)
    }
}

/// Estimate PAR(p) model parameters from inflow history when explicit stats are absent.
///
/// Reads the file manifest for `case_dir` to determine the input path. When
/// `inflow_history.parquet` is present and `inflow_seasonal_stats.parquet` is absent,
/// the function runs the full estimation pipeline and returns a new [`System`] with
/// updated `inflow_models` and (optionally) `correlation`. In all other cases the
/// `system` is returned unchanged.
///
/// # Estimation pipeline
///
/// 1. Runs structural validation to get the [`FileManifest`].
/// 2. Checks the input path matrix (see module doc).
/// 3. Loads inflow history via [`parse_inflow_history`].
/// 4. Estimates seasonal stats via `estimate_seasonal_stats`.
/// 5. Estimates AR coefficients (with AIC or fixed-order selection).
/// 6. Estimates correlation when `correlation.json` is absent.
/// 7. Converts estimation results to `cobre-io` row types, calls
///    `assemble_inflow_models`, and returns
///    `system.with_scenario_models(inflow_models, correlation)`.
///
/// # Errors
///
/// - [`EstimationError::Load`] — file read or parse failure.
/// - [`EstimationError::Stochastic`] — insufficient observations for any
///   `(entity, season)` group.
pub fn estimate_from_history(
    system: System,
    case_dir: &Path,
    config: &Config,
) -> Result<(System, Option<EstimationReport>), EstimationError> {
    // ── Step 1: resolve file manifest ────────────────────────────────────────
    let mut ctx = ValidationContext::new();
    let manifest = validate_structure(case_dir, &mut ctx);

    // Abort early if structural validation found errors.
    if ctx.into_result().is_err() {
        return Ok((system, None));
    }

    // ── Step 2: input path matrix ────────────────────────────────────────────
    if !manifest.scenarios_inflow_history_parquet {
        // No history file — nothing to estimate.
        return Ok((system, None));
    }

    if manifest.scenarios_inflow_seasonal_stats_parquet
        && manifest.scenarios_inflow_ar_coefficients_parquet
    {
        // Explicit stats provided — skip estimation.
        return Ok((system, None));
    }

    // History present, stats absent — run estimation.
    let (system, report) = run_estimation(system, case_dir, config, &manifest)?;
    Ok((system, Some(report)))
}

/// Inner function that runs the full estimation pipeline once path conditions are met.
fn run_estimation(
    system: System,
    case_dir: &Path,
    config: &Config,
    manifest: &FileManifest,
) -> Result<(System, EstimationReport), EstimationError> {
    // ── Step 3: load inflow history ──────────────────────────────────────────
    let history_path = case_dir.join("scenarios/inflow_history.parquet");
    let history = parse_inflow_history(&history_path)?;

    // ── Convert InflowHistoryRow to (EntityId, NaiveDate, f64) tuples ────────
    let observations: Vec<(EntityId, NaiveDate, f64)> = history
        .iter()
        .map(|row| (row.hydro_id, row.date, row.value_m3s))
        .collect();

    // ── Collect hydro IDs from system (canonical sorted order) ───────────────
    let hydro_ids: Vec<EntityId> = system.hydros().iter().map(|h| h.id).collect();

    // ── Use stages already present in the system (avoids re-parsing stages.json) ──
    let stages = system.stages();

    // ── Extract season map for calendar-based date-to-season fallback ────────
    let season_map = system.policy_graph().season_map.as_ref();

    // ── Step 4: estimate seasonal stats ─────────────────────────────────────
    let seasonal_stats =
        estimate_seasonal_stats_with_season_map(&observations, stages, &hydro_ids, season_map)?;

    // ── Step 5: estimate AR coefficients ────────────────────────────────────
    let max_order = config.estimation.max_order as usize;
    let (ar_estimates, estimation_report) = estimate_ar_coefficients_with_selection(
        &observations,
        &seasonal_stats,
        stages,
        &hydro_ids,
        &ArEstimationConfig {
            max_order,
            max_coeff_magnitude: config.estimation.max_coefficient_magnitude,
            method: &config.estimation.order_selection,
            season_map,
        },
    )?;

    // ── Step 6: estimate or preserve correlation ─────────────────────────────
    let correlation = if manifest.scenarios_correlation_json {
        // Explicit correlation.json is present — keep whatever was loaded by load_case.
        system.correlation().clone()
    } else {
        estimate_correlation_with_season_map(
            &observations,
            &ar_estimates,
            &seasonal_stats,
            stages,
            &hydro_ids,
            season_map,
        )?
    };

    // ── Step 7: convert results to row types and assemble inflow models ───────
    let stats_rows = seasonal_stats_to_rows(&seasonal_stats, stages);
    let coeff_rows = ar_estimates_to_rows(&ar_estimates, stages);

    let inflow_models = assemble_inflow_models(stats_rows, coeff_rows)?;

    Ok((
        system.with_scenario_models(inflow_models, correlation),
        estimation_report,
    ))
}

/// Configuration parameters for AR coefficient estimation.
struct ArEstimationConfig<'a> {
    max_order: usize,
    max_coeff_magnitude: Option<f64>,
    method: &'a OrderSelectionMethod,
    season_map: Option<&'a cobre_core::temporal::SeasonMap>,
}

/// Estimate AR coefficients with the configured order selection method.
///
/// For `Fixed`, delegates directly to `estimate_ar_coefficients`.
/// For `Aic`, calls `estimate_ar_coefficients` at `max_order` to obtain full
/// coefficients, then calls `levinson_durbin` per `(entity, season)` pair to
/// get `sigma2_per_order` for AIC minimisation, and truncates the coefficient
/// vector to the AIC-selected order.
fn estimate_ar_coefficients_with_selection(
    observations: &[(EntityId, NaiveDate, f64)],
    seasonal_stats: &[SeasonalStats],
    stages: &[cobre_core::temporal::Stage],
    hydro_ids: &[EntityId],
    cfg: &ArEstimationConfig<'_>,
) -> Result<(Vec<ArCoefficientEstimate>, EstimationReport), StochasticError> {
    match cfg.method {
        OrderSelectionMethod::Fixed => {
            let mut estimates = estimate_ar_coefficients_with_season_map(
                observations,
                seasonal_stats,
                stages,
                hydro_ids,
                cfg.max_order,
                cfg.season_map,
            )?;

            // Build stats_map and n_seasons for contribution validation.
            let stage_index = stages
                .iter()
                .filter_map(|s| s.season_id.map(|sid| (s.id, sid)))
                .collect::<Vec<_>>();
            let stage_id_to_season: HashMap<i32, usize> = stage_index.iter().copied().collect();
            let n_seasons = stage_index
                .iter()
                .map(|&(_, sid)| sid + 1)
                .max()
                .unwrap_or(0);
            let stats_map: HashMap<(EntityId, usize), &SeasonalStats> = seasonal_stats
                .iter()
                .filter_map(|s| {
                    let season_id = stage_id_to_season.get(&s.stage_id).copied()?;
                    Some(((s.entity_id, season_id), s))
                })
                .collect();
            let sigma2_map: HashMap<(EntityId, usize), Vec<f64>> = HashMap::new();

            let reductions = apply_contribution_validation(
                &mut estimates,
                n_seasons,
                &stats_map,
                &sigma2_map,
                cfg.max_coeff_magnitude,
            );

            let report = build_estimation_report(&estimates, n_seasons, &reductions, "fixed");
            Ok((estimates, report))
        }
        OrderSelectionMethod::Pacf => estimate_ar_with_pacf(
            observations,
            seasonal_stats,
            stages,
            hydro_ids,
            cfg.max_order,
            cfg.season_map,
            cfg.max_coeff_magnitude,
        ),
    }
}

/// PACF-based AR order selection using periodic Yule-Walker method.
///
/// Selects the AR order using the periodic partial autocorrelation function
/// (PACF) significance test, then estimates coefficients via the periodic
/// Yule-Walker matrix solve. The PACF threshold uses a 95% confidence
/// interval (`z_alpha = 1.96`).
///
/// The periodic approach correctly accounts for the non-Toeplitz covariance
/// structure of periodic autoregressive processes.
#[allow(clippy::too_many_lines)]
fn estimate_ar_with_pacf(
    observations: &[(EntityId, NaiveDate, f64)],
    seasonal_stats: &[SeasonalStats],
    stages: &[cobre_core::temporal::Stage],
    hydro_ids: &[EntityId],
    max_order: usize,
    season_map: Option<&cobre_core::temporal::SeasonMap>,
    max_coeff_magnitude: Option<f64>,
) -> Result<(Vec<ArCoefficientEstimate>, EstimationReport), StochasticError> {
    if max_order == 0 {
        // Order-0: produce white-noise estimates for all (entity, season) pairs.
        let estimates = estimate_ar_coefficients_with_season_map(
            observations,
            seasonal_stats,
            stages,
            hydro_ids,
            0,
            season_map,
        )?;
        let report = EstimationReport {
            entries: BTreeMap::new(),
            method: "PACF".to_string(),
        };
        return Ok((estimates, report));
    }

    // Build stage and stats lookups.
    let mut stage_index = stages
        .iter()
        .filter_map(|s| s.season_id.map(|sid| (s.start_date, s.end_date, s.id, sid)))
        .collect::<Vec<_>>();
    stage_index.sort_unstable_by_key(|(start, _, _, _)| *start);

    let stage_id_to_season: HashMap<i32, usize> = stage_index
        .iter()
        .map(|&(_, _, stage_id, season_id)| (stage_id, season_id))
        .collect();

    let stats_map: HashMap<(EntityId, usize), &SeasonalStats> = seasonal_stats
        .iter()
        .filter_map(|s| {
            let season_id = stage_id_to_season.get(&s.stage_id).copied()?;
            Some(((s.entity_id, season_id), s))
        })
        .collect();

    let n_seasons: usize = {
        let mut max_season = 0usize;
        for &(_, _, _, season_id) in &stage_index {
            if season_id >= max_season {
                max_season = season_id + 1;
            }
        }
        max_season
    };

    // Group observations by (entity_id, season_id).
    let entity_set: HashSet<EntityId> = hydro_ids.iter().copied().collect();
    let mut group_obs: HashMap<(EntityId, usize), Vec<f64>> = HashMap::new();
    for &(entity_id, date, value) in observations {
        if !entity_set.contains(&entity_id) {
            continue;
        }
        let Some(season_id) = find_season_for_date(&stage_index, date)
            .or_else(|| season_map.and_then(|sm| sm.season_for_date(date)))
        else {
            continue;
        };
        group_obs
            .entry((entity_id, season_id))
            .or_default()
            .push(value);
    }

    // 95% confidence z-score for the PACF significance threshold.
    let z_alpha = 1.96_f64;

    // Build estimates using periodic PACF and YW coefficient estimation.
    let mut estimates: Vec<ArCoefficientEstimate> = Vec::new();

    for &hydro_id in hydro_ids {
        // Build observations_by_season and stats_by_season for this entity.
        let mut obs_by_season: Vec<Vec<f64>> = vec![Vec::new(); n_seasons];
        let mut stats_by_season: Vec<(f64, f64)> = vec![(0.0, 0.0); n_seasons];

        for season in 0..n_seasons {
            if let Some(obs) = group_obs.get(&(hydro_id, season)) {
                obs_by_season[season].clone_from(obs);
            }
            if let Some(stats) = stats_map.get(&(hydro_id, season)) {
                stats_by_season[season] = (stats.mean, stats.std);
            }
        }

        let obs_refs: Vec<&[f64]> = obs_by_season.iter().map(Vec::as_slice).collect();

        for season in 0..n_seasons {
            let stats_s = stats_by_season[season];

            // Skip seasons with zero std or insufficient data.
            if stats_s.1 == 0.0 || obs_by_season[season].len() < 2 {
                estimates.push(ArCoefficientEstimate {
                    hydro_id,
                    season_id: season,
                    coefficients: Vec::new(),
                    residual_std_ratio: 1.0,
                });
                continue;
            }

            let n_obs = obs_by_season[season].len();

            // Compute periodic PACF for order selection.
            let pacf_values =
                periodic_pacf(season, max_order, n_seasons, &obs_refs, &stats_by_season);

            // Select order via significance test.
            let pacf_result = select_order_pacf(&pacf_values, n_obs, z_alpha);
            let selected_order = pacf_result.selected_order;

            // Estimate AR coefficients at the selected order using periodic YW.
            let yw_result = estimate_periodic_ar_coefficients(
                season,
                selected_order,
                n_seasons,
                &obs_refs,
                &stats_by_season,
            );

            estimates.push(ArCoefficientEstimate {
                hydro_id,
                season_id: season,
                coefficients: yw_result.coefficients,
                residual_std_ratio: yw_result.residual_std_ratio,
            });
        }
    }

    // === Iterative PACF reduction with contribution validation ===
    let reductions = iterative_pacf_reduction(
        &mut estimates,
        n_seasons,
        hydro_ids,
        &group_obs,
        &stats_map,
        max_order,
        z_alpha,
        max_coeff_magnitude,
    );

    let report = build_estimation_report(&estimates, n_seasons, &reductions, "PACF");
    Ok((estimates, report))
}

/// Iteratively reduce AR orders via PACF re-selection and contribution validation.
///
/// For each entity, maintains per-season `max_order` ceilings. When contribution
/// analysis detects negative contributions for a season, reduces that season's
/// ceiling by 1 and re-runs the full PACF selection + YW estimation + `phi_1`
/// check + contribution validation cycle. Repeats until all seasons pass or
/// their ceilings reach 0.
///
/// This implements NEWAVE's `reducao_ordem` algorithm.
#[allow(clippy::too_many_arguments, clippy::too_many_lines)]
fn iterative_pacf_reduction(
    estimates: &mut [ArCoefficientEstimate],
    n_seasons: usize,
    hydro_ids: &[EntityId],
    group_obs: &HashMap<(EntityId, usize), Vec<f64>>,
    stats_map: &HashMap<(EntityId, usize), &SeasonalStats>,
    initial_max_order: usize,
    z_alpha: f64,
    max_coeff_magnitude: Option<f64>,
) -> HashMap<EntityId, Vec<ContributionReduction>> {
    let mut all_reductions: HashMap<EntityId, Vec<ContributionReduction>> = HashMap::new();

    // Pre-pass: magnitude bound safety check (same as apply_contribution_validation).
    if let Some(threshold) = max_coeff_magnitude {
        for est in estimates.iter_mut() {
            let has_explosive = est.coefficients.iter().any(|c| c.abs() > threshold);
            if has_explosive {
                let original_order = est.coefficients.len();
                all_reductions
                    .entry(est.hydro_id)
                    .or_default()
                    .push(ContributionReduction {
                        season_id: est.season_id,
                        original_order,
                        reduced_order: 0,
                        contributions: Vec::new(),
                        reason: ReductionReason::MagnitudeBound,
                    });
                est.coefficients.clear();
                est.residual_std_ratio = 1.0;
            }
        }
    }

    // Pre-pass 2: phi_1 negativity rejection.
    for est in estimates.iter_mut() {
        if has_negative_phi1(&est.coefficients) {
            let original_order = est.coefficients.len();
            all_reductions
                .entry(est.hydro_id)
                .or_default()
                .push(ContributionReduction {
                    season_id: est.season_id,
                    original_order,
                    reduced_order: 0,
                    contributions: Vec::new(),
                    reason: ReductionReason::Phi1Negative,
                });
            est.coefficients.clear();
            est.residual_std_ratio = 1.0;
        }
    }

    // Group estimate indices by hydro_id for per-entity processing.
    let mut hydro_indices: BTreeMap<EntityId, Vec<usize>> = BTreeMap::new();
    for (idx, est) in estimates.iter().enumerate() {
        hydro_indices.entry(est.hydro_id).or_default().push(idx);
    }

    // Process each entity with iterative PACF re-selection.
    for &hydro_id in hydro_ids {
        let Some(indices) = hydro_indices.get(&hydro_id) else {
            continue;
        };

        // Build observations and stats for this entity.
        let mut obs_by_season: Vec<Vec<f64>> = vec![Vec::new(); n_seasons];
        let mut stats_by_season: Vec<(f64, f64)> = vec![(0.0, 0.0); n_seasons];

        for season in 0..n_seasons {
            if let Some(obs) = group_obs.get(&(hydro_id, season)) {
                obs_by_season[season].clone_from(obs);
            }
            if let Some(stats) = stats_map.get(&(hydro_id, season)) {
                stats_by_season[season] = (stats.mean, stats.std);
            }
        }

        // Build std_by_season for contribution analysis.
        let std_by_season: Vec<f64> = (0..n_seasons)
            .map(|sid| stats_map.get(&(hydro_id, sid)).map_or(0.0, |s| s.std))
            .collect();

        // Initialize per-season max_order ceilings.
        let mut max_orders: Vec<usize> = vec![initial_max_order; n_seasons];

        // Initialize current coefficients from estimates.
        let mut all_coeffs: Vec<Vec<f64>> = vec![Vec::new(); n_seasons];
        for &idx in indices {
            let est = &estimates[idx];
            if est.season_id < n_seasons {
                all_coeffs[est.season_id].clone_from(&est.coefficients);
            }
        }

        // Mark which seasons are already finalized (order 0 from pre-passes or
        // insufficient data).
        let mut frozen: Vec<bool> = vec![false; n_seasons];
        for &idx in indices {
            let sid = estimates[idx].season_id;
            if estimates[idx].coefficients.is_empty() {
                frozen[sid] = true;
            }
        }

        let obs_refs: Vec<&[f64]> = obs_by_season.iter().map(Vec::as_slice).collect();

        // Iterative contribution validation loop.
        loop {
            // Find seasons that fail contribution validation.
            let mut failing_seasons: Vec<usize> = Vec::new();

            for &idx in indices {
                let season_id = estimates[idx].season_id;
                if frozen[season_id] {
                    continue;
                }
                let current_order = estimates[idx].coefficients.len();
                if current_order == 0 {
                    continue;
                }

                let result = validate_order_contributions(
                    season_id,
                    n_seasons,
                    current_order,
                    &all_coeffs,
                    &std_by_season,
                );

                if !result.valid {
                    all_reductions
                        .entry(hydro_id)
                        .or_default()
                        .push(ContributionReduction {
                            season_id,
                            original_order: current_order,
                            reduced_order: result.max_valid_order,
                            contributions: result.contributions,
                            reason: ReductionReason::NegativeContribution,
                        });
                    failing_seasons.push(season_id);
                }
            }

            if failing_seasons.is_empty() {
                break; // All seasons pass.
            }

            // Reduce max_order for failing seasons and re-run PACF + YW.
            let mut any_reselected = false;
            for &season_id in &failing_seasons {
                if max_orders[season_id] == 0 {
                    continue;
                }
                max_orders[season_id] -= 1;

                if max_orders[season_id] == 0 {
                    // Set to order 0 directly.
                    for &idx in indices {
                        if estimates[idx].season_id == season_id {
                            estimates[idx].coefficients.clear();
                            estimates[idx].residual_std_ratio = 1.0;
                            all_coeffs[season_id].clear();
                            frozen[season_id] = true;
                        }
                    }
                    continue;
                }

                // Re-run PACF with reduced max_order.
                let stats_s = stats_by_season[season_id];
                if stats_s.1 == 0.0 || obs_by_season[season_id].len() < 2 {
                    frozen[season_id] = true;
                    continue;
                }

                let n_obs = obs_by_season[season_id].len();
                let pacf_values = periodic_pacf(
                    season_id,
                    max_orders[season_id],
                    n_seasons,
                    &obs_refs,
                    &stats_by_season,
                );
                let pacf_result = select_order_pacf(&pacf_values, n_obs, z_alpha);
                let selected_order = pacf_result.selected_order;

                // Re-estimate coefficients at the new selected order.
                let yw_result = estimate_periodic_ar_coefficients(
                    season_id,
                    selected_order,
                    n_seasons,
                    &obs_refs,
                    &stats_by_season,
                );

                // Update the estimate.
                for &idx in indices {
                    if estimates[idx].season_id == season_id {
                        estimates[idx]
                            .coefficients
                            .clone_from(&yw_result.coefficients);
                        estimates[idx].residual_std_ratio = yw_result.residual_std_ratio;
                        all_coeffs[season_id].clone_from(&yw_result.coefficients);
                    }
                }

                // Check phi_1 on re-estimated coefficients.
                if has_negative_phi1(&all_coeffs[season_id]) {
                    let original_order = all_coeffs[season_id].len();
                    all_reductions
                        .entry(hydro_id)
                        .or_default()
                        .push(ContributionReduction {
                            season_id,
                            original_order,
                            reduced_order: 0,
                            contributions: Vec::new(),
                            reason: ReductionReason::Phi1Negative,
                        });
                    for &idx in indices {
                        if estimates[idx].season_id == season_id {
                            estimates[idx].coefficients.clear();
                            estimates[idx].residual_std_ratio = 1.0;
                            all_coeffs[season_id].clear();
                            frozen[season_id] = true;
                        }
                    }
                } else {
                    any_reselected = true;
                }
            }

            if !any_reselected {
                break; // All failing seasons are at order 0 or frozen.
            }
        }
    }

    all_reductions
}

/// Apply coefficient magnitude bound and contribution-based order validation
/// to all AR estimates.
///
/// When `max_coeff_magnitude` is `Some(threshold)`, any (entity, season)
/// with `|coefficient| > threshold` is immediately reduced to order 0
/// before the contribution analysis runs. This acts as a fast-path safety
/// net for the most extreme explosive models.
///
/// Then for each entity, groups all season coefficients and standard
/// deviations, iterates per season: if negative contributions are found,
/// the order is reduced to the maximum valid order and the coefficient
/// vector truncated. The loop repeats until all contributions are
/// non-negative or the order reaches zero.
///
/// Returns a map of `EntityId` -> list of `ContributionReduction` events.
fn apply_contribution_validation(
    estimates: &mut [ArCoefficientEstimate],
    n_seasons: usize,
    stats_map: &HashMap<(EntityId, usize), &SeasonalStats>,
    sigma2_map: &HashMap<(EntityId, usize), Vec<f64>>,
    max_coeff_magnitude: Option<f64>,
) -> HashMap<EntityId, Vec<ContributionReduction>> {
    let mut all_reductions: HashMap<EntityId, Vec<ContributionReduction>> = HashMap::new();

    // Pre-pass: magnitude bound safety check.
    if let Some(threshold) = max_coeff_magnitude {
        for est in estimates.iter_mut() {
            let has_explosive = est.coefficients.iter().any(|c| c.abs() > threshold);
            if has_explosive {
                let original_order = est.coefficients.len();
                all_reductions
                    .entry(est.hydro_id)
                    .or_default()
                    .push(ContributionReduction {
                        season_id: est.season_id,
                        original_order,
                        reduced_order: 0,
                        contributions: Vec::new(), // magnitude-based, no contributions computed
                        reason: ReductionReason::MagnitudeBound,
                    });
                est.coefficients.clear();
                est.residual_std_ratio = 1.0;
            }
        }
    }

    // Pre-pass 2: phi_1 negativity rejection.
    // A negative first AR coefficient contradicts hydrological persistence.
    // This check is cheaper than contribution analysis and catches most
    // unstable models early.
    for est in estimates.iter_mut() {
        if has_negative_phi1(&est.coefficients) {
            let original_order = est.coefficients.len();
            all_reductions
                .entry(est.hydro_id)
                .or_default()
                .push(ContributionReduction {
                    season_id: est.season_id,
                    original_order,
                    reduced_order: 0,
                    contributions: Vec::new(),
                    reason: ReductionReason::Phi1Negative,
                });
            est.coefficients.clear();
            est.residual_std_ratio = 1.0;
        }
    }

    // Group estimate indices by hydro_id for per-entity processing.
    let mut hydro_indices: BTreeMap<EntityId, Vec<usize>> = BTreeMap::new();
    for (idx, est) in estimates.iter().enumerate() {
        hydro_indices.entry(est.hydro_id).or_default().push(idx);
    }

    for (&hydro_id, indices) in &hydro_indices {
        // Build std_by_season from seasonal_stats.
        let std_by_season: Vec<f64> = (0..n_seasons)
            .map(|sid| stats_map.get(&(hydro_id, sid)).map_or(0.0, |s| s.std))
            .collect();

        // Build all_season_coefficients from current estimates.
        let mut all_coeffs: Vec<Vec<f64>> = vec![Vec::new(); n_seasons];
        for &idx in indices {
            let est = &estimates[idx];
            if est.season_id < n_seasons {
                all_coeffs[est.season_id].clone_from(&est.coefficients);
            }
        }

        // Validate each season for this entity.
        for &idx in indices {
            let season_id = estimates[idx].season_id;
            let mut current_order = estimates[idx].coefficients.len();

            loop {
                let result = validate_order_contributions(
                    season_id,
                    n_seasons,
                    current_order,
                    &all_coeffs,
                    &std_by_season,
                );

                if result.valid || current_order == 0 {
                    break;
                }

                let original_order = current_order;
                let reduced_order = result.max_valid_order;

                all_reductions
                    .entry(hydro_id)
                    .or_default()
                    .push(ContributionReduction {
                        season_id,
                        original_order,
                        reduced_order,
                        contributions: result.contributions,
                        reason: ReductionReason::NegativeContribution,
                    });

                // Truncate coefficients to the reduced order.
                estimates[idx].coefficients.truncate(reduced_order);

                // Recompute residual_std_ratio from stored sigma2 if available.
                if reduced_order == 0 {
                    estimates[idx].residual_std_ratio = 1.0;
                } else if let Some(sigma2_vec) = sigma2_map.get(&(hydro_id, season_id)) {
                    if reduced_order <= sigma2_vec.len() {
                        let sigma2 = sigma2_vec[reduced_order - 1];
                        estimates[idx].residual_std_ratio = residual_std_ratio_from_sigma2(sigma2);
                    }
                }

                // Update the shared coefficient array.
                all_coeffs[season_id].clone_from(&estimates[idx].coefficients);
                current_order = reduced_order;
            }
        }
    }

    all_reductions
}

/// Build an [`EstimationReport`] from AR estimates and contribution validation results.
///
/// This function is infallible: it only reorganises already-computed data.
/// For each hydro plant the selected order is the **maximum** across all
/// seasons. These choices align with how the I/O layer (`FittingReport`)
/// expects a single order per hydro.
pub(crate) fn build_estimation_report(
    estimates: &[ArCoefficientEstimate],
    n_seasons: usize,
    contribution_reductions: &HashMap<EntityId, Vec<ContributionReduction>>,
    method: &str,
) -> EstimationReport {
    // Group coefficient vectors by hydro_id (estimates are already sorted by
    // (hydro_id, season_id) from estimate_ar_coefficients).
    let mut hydro_coeffs: BTreeMap<EntityId, Vec<(usize, Vec<f64>)>> = BTreeMap::new();
    for est in estimates {
        hydro_coeffs
            .entry(est.hydro_id)
            .or_default()
            .push((est.season_id, est.coefficients.clone()));
    }

    let mut entries: BTreeMap<EntityId, HydroEstimationEntry> = BTreeMap::new();

    for (hydro_id, mut season_coeffs) in hydro_coeffs {
        // Sort by season_id ascending (should already be sorted, but ensure it).
        season_coeffs.sort_by_key(|(season_id, _)| *season_id);

        // Compute max selected_order as the maximum actual coefficient length
        // across all seasons for this hydro (after all truncations).
        let selected_order = season_coeffs
            .iter()
            .map(|(_, coeffs)| coeffs.len())
            .max()
            .unwrap_or(0);

        // Build per-season coefficient vectors, filling missing seasons with empty vecs.
        // The season_coeffs may not cover all n_seasons if some were skipped.
        let season_map: HashMap<usize, Vec<f64>> = season_coeffs.into_iter().collect();
        let coefficients: Vec<Vec<f64>> = (0..n_seasons)
            .map(|sid| season_map.get(&sid).cloned().unwrap_or_default())
            .collect();

        let reductions = contribution_reductions
            .get(&hydro_id)
            .cloned()
            .unwrap_or_default();

        #[allow(clippy::cast_possible_truncation)]
        entries.insert(
            hydro_id,
            HydroEstimationEntry {
                selected_order: selected_order as u32,
                coefficients,
                contribution_reductions: reductions,
            },
        );
    }

    EstimationReport {
        entries,
        method: method.to_string(),
    }
}

/// Convert [`SeasonalStats`] to [`InflowSeasonalStatsRow`], expanding
/// per-season estimates to every stage that shares the same `season_id`.
///
/// The `stage_id` in `SeasonalStats` stores the ID of the first stage with
/// the matching season.  This function looks up that stage's `season_id` and
/// emits one row for every stage with the same season, so that
/// [`cobre_stochastic::PrecomputedPar`] finds a model at every stage index.
///
/// **Pre-study stages**: When `stages` contains pre-study stages (negative
/// `id`, valid `season_id`), this function includes them in the expansion.
/// Pre-study rows are emitted with their negative `stage_id`, which allows
/// `PrecomputedPar::build` to find direct hits for lag stages without
/// needing the season-based fallback path.
fn seasonal_stats_to_rows(
    stats: &[SeasonalStats],
    stages: &[cobre_core::temporal::Stage],
) -> Vec<InflowSeasonalStatsRow> {
    let stage_to_season: HashMap<i32, usize> = stages
        .iter()
        .filter_map(|s| s.season_id.map(|sid| (s.id, sid)))
        .collect();

    let mut season_to_stages: HashMap<usize, Vec<i32>> = HashMap::new();
    for stage in stages {
        if let Some(sid) = stage.season_id {
            season_to_stages.entry(sid).or_default().push(stage.id);
        }
    }

    let mut rows = Vec::with_capacity(stats.len() * 10);
    for s in stats {
        if let Some(&season_id) = stage_to_season.get(&s.stage_id) {
            if let Some(stage_ids) = season_to_stages.get(&season_id) {
                for &stage_id in stage_ids {
                    rows.push(InflowSeasonalStatsRow {
                        hydro_id: s.entity_id,
                        stage_id,
                        mean_m3s: s.mean,
                        std_m3s: s.std,
                    });
                }
                continue;
            }
        }
        // Fallback: emit just the original stage_id.
        rows.push(InflowSeasonalStatsRow {
            hydro_id: s.entity_id,
            stage_id: s.stage_id,
            mean_m3s: s.mean,
            std_m3s: s.std,
        });
    }

    rows.sort_by_key(|r| (r.hydro_id.0, r.stage_id));
    rows
}

/// Convert [`ArCoefficientEstimate`] to [`InflowArCoefficientRow`], expanding
/// per-season AR coefficients to every stage that shares the same `season_id`.
///
/// The `season_id` in `ArCoefficientEstimate` is mapped to ALL stage IDs whose
/// `season_id` matches, so the resulting coefficient rows cover the full study
/// horizon (not just the first occurrence of each season).
///
/// **Pre-study stages**: When `stages` contains pre-study stages (negative
/// `id`, valid `season_id`), this function includes them in the expansion.
/// Pre-study coefficient rows are emitted with their negative `stage_id`,
/// providing direct model entries for `PrecomputedPar::build` lag lookups.
fn ar_estimates_to_rows(
    ar_estimates: &[ArCoefficientEstimate],
    stages: &[cobre_core::temporal::Stage],
) -> Vec<InflowArCoefficientRow> {
    let mut season_to_stages: HashMap<usize, Vec<i32>> = HashMap::new();
    for stage in stages {
        if let Some(sid) = stage.season_id {
            season_to_stages.entry(sid).or_default().push(stage.id);
        }
    }

    let mut rows: Vec<InflowArCoefficientRow> = Vec::new();

    for est in ar_estimates {
        let Some(stage_ids) = season_to_stages.get(&est.season_id) else {
            continue;
        };

        for &stage_id in stage_ids {
            for (lag_idx, &coeff) in est.coefficients.iter().enumerate() {
                #[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)]
                let lag = (lag_idx + 1) as i32;
                rows.push(InflowArCoefficientRow {
                    hydro_id: est.hydro_id,
                    stage_id,
                    lag,
                    coefficient: coeff,
                    residual_std_ratio: est.residual_std_ratio,
                });
            }
        }
    }

    // Sort by (hydro_id, stage_id, lag) ascending — matches parser convention.
    rows.sort_by_key(|r| (r.hydro_id.0, r.stage_id, r.lag));

    rows
}

// ── Tests ─────────────────────────────────────────────────────────────────────

#[cfg(test)]
#[allow(
    clippy::unwrap_used,
    clippy::expect_used,
    clippy::panic,
    clippy::too_many_lines,
    clippy::float_cmp,
    clippy::doc_markdown,
    clippy::cast_possible_truncation,
    clippy::cast_possible_wrap,
    clippy::useless_vec
)]
mod tests {
    use super::*;
    use cobre_core::scenario::{CorrelationModel, InflowModel};
    use cobre_core::{EntityId, SystemBuilder};

    // ── Helper to build a minimal System ─────────────────────────────────────

    fn minimal_system_with_inflow_models(models: Vec<InflowModel>) -> System {
        SystemBuilder::new()
            .inflow_models(models)
            .build()
            .expect("valid system")
    }

    // ── with_scenario_models tests ────────────────────────────────────────────

    /// AC-036-1: `with_scenario_models` replaces `inflow_models` and `correlation`
    /// while preserving all other fields.
    #[test]
    fn test_with_scenario_models_replaces_fields() {
        use cobre_core::{
            Bus, DeficitSegment,
            scenario::{CorrelationModel, InflowModel},
        };

        let bus = Bus {
            id: EntityId(1),
            name: "B1".to_string(),
            deficit_segments: vec![DeficitSegment {
                depth_mw: Some(f64::INFINITY),
                cost_per_mwh: 1000.0,
            }],
            excess_cost: 0.0,
        };

        // Build system with 2 inflow models.
        let old_model = InflowModel {
            hydro_id: EntityId(1),
            stage_id: 0,
            mean_m3s: 10.0,
            std_m3s: 1.0,
            ar_coefficients: vec![],
            residual_std_ratio: 1.0,
        };
        let system = SystemBuilder::new()
            .buses(vec![bus])
            .inflow_models(vec![old_model.clone(), {
                let mut m = old_model.clone();
                m.stage_id = 1;
                m
            }])
            .build()
            .expect("valid system");

        assert_eq!(system.inflow_models().len(), 2);
        assert_eq!(system.n_buses(), 1);

        // Replace with 4 models.
        let new_models: Vec<InflowModel> = (0..4)
            .map(|i| InflowModel {
                hydro_id: EntityId(1),
                stage_id: i,
                mean_m3s: 50.0,
                std_m3s: 5.0,
                ar_coefficients: vec![0.4],
                residual_std_ratio: 0.9,
            })
            .collect();
        let new_corr = CorrelationModel::default();

        let updated = system.with_scenario_models(new_models.clone(), new_corr.clone());

        // inflow_models and correlation updated.
        assert_eq!(updated.inflow_models().len(), 4, "expected 4 inflow models");
        assert_eq!(
            *updated.correlation(),
            new_corr,
            "correlation should equal new_corr"
        );

        // hydros, buses, stages unchanged.
        assert_eq!(updated.n_buses(), 1, "buses must be preserved");
        assert!(
            updated.hydros().is_empty(),
            "hydros must be preserved (empty)"
        );
        assert!(
            updated.stages().is_empty(),
            "stages must be preserved (empty)"
        );
    }

    /// `with_scenario_models` with an empty vec clears `inflow_models`.
    #[test]
    fn test_with_scenario_models_clears_when_empty() {
        let model = InflowModel {
            hydro_id: EntityId(1),
            stage_id: 0,
            mean_m3s: 100.0,
            std_m3s: 10.0,
            ar_coefficients: vec![],
            residual_std_ratio: 1.0,
        };
        let system = minimal_system_with_inflow_models(vec![model]);
        assert_eq!(system.inflow_models().len(), 1);

        let updated = system.with_scenario_models(vec![], CorrelationModel::default());
        assert!(updated.inflow_models().is_empty());
    }

    // ── estimate_from_history path-matrix tests ────────────────────────────────

    /// AC-036-3: When both stats files exist, `estimate_from_history` returns
    /// the system unchanged.
    #[test]
    fn test_estimate_explicit_stats_returns_unchanged() {
        use tempfile::TempDir;

        let dir = TempDir::new().unwrap();
        let case_dir = dir.path();

        // Create the minimal required files so validate_structure won't fail
        // (it only checks existence).
        create_required_files(case_dir);

        // Create both stats files.
        let scenarios = case_dir.join("scenarios");
        std::fs::create_dir_all(&scenarios).unwrap();
        std::fs::write(scenarios.join("inflow_history.parquet"), b"").unwrap();
        std::fs::write(scenarios.join("inflow_seasonal_stats.parquet"), b"").unwrap();
        std::fs::write(scenarios.join("inflow_ar_coefficients.parquet"), b"").unwrap();

        let model = InflowModel {
            hydro_id: EntityId(1),
            stage_id: 0,
            mean_m3s: 100.0,
            std_m3s: 10.0,
            ar_coefficients: vec![0.5],
            residual_std_ratio: 0.87,
        };
        let system = minimal_system_with_inflow_models(vec![model]);
        let original_len = system.inflow_models().len();

        let config = default_config();
        let (result, report) = estimate_from_history(system, case_dir, &config).unwrap();

        assert_eq!(
            result.inflow_models().len(),
            original_len,
            "explicit stats: system must be unchanged"
        );
        assert!(
            report.is_none(),
            "explicit stats path must return None report"
        );
    }

    /// AC-036-4: When no history file exists, `estimate_from_history` returns
    /// the system unchanged.
    #[test]
    fn test_estimate_no_history_returns_unchanged() {
        use tempfile::TempDir;

        let dir = TempDir::new().unwrap();
        let case_dir = dir.path();

        // No scenarios/ directory at all.
        create_required_files(case_dir);

        let model = InflowModel {
            hydro_id: EntityId(1),
            stage_id: 0,
            mean_m3s: 100.0,
            std_m3s: 10.0,
            ar_coefficients: vec![],
            residual_std_ratio: 1.0,
        };
        let system = minimal_system_with_inflow_models(vec![model]);
        let original_len = system.inflow_models().len();

        let config = default_config();
        let (result, report) = estimate_from_history(system, case_dir, &config).unwrap();

        assert_eq!(
            result.inflow_models().len(),
            original_len,
            "no history: system must be unchanged"
        );
        assert!(report.is_none(), "no history path must return None report");
    }

    // ── Helpers ───────────────────────────────────────────────────────────────

    fn default_config() -> Config {
        use cobre_io::config::{EstimationConfig, OrderSelectionMethod};
        let mut cfg: Config = serde_json::from_str(MINIMAL_CONFIG_JSON).unwrap();
        cfg.estimation = EstimationConfig {
            max_order: 2,
            order_selection: OrderSelectionMethod::Fixed,
            min_observations_per_season: 2,
            max_coefficient_magnitude: None,
        };
        cfg
    }

    const MINIMAL_CONFIG_JSON: &str = r#"{
        "training": { "seed": 42 },
        "simulation": { "enabled": false, "num_scenarios": 0, "io_channel_capacity": 16 },
        "modeling": {},
        "policy": {},
        "exports": {},
        "output": {}
    }"#;

    fn create_required_files(case_dir: &std::path::Path) {
        // validate_structure only checks existence; content doesn't matter here.
        let _ = std::fs::create_dir_all(case_dir.join("system"));
        let _ = std::fs::create_dir_all(case_dir.join("scenarios"));
        let write = |name: &str| {
            let _ = std::fs::write(case_dir.join(name), b"{}");
        };
        write("config.json");
        write("penalties.json");
        write("stages.json");
        write("initial_conditions.json");
        write("system/buses.json");
        write("system/lines.json");
        write("system/hydros.json");
        write("system/thermals.json");
    }

    // ── EstimationReport unit tests ───────────────────────────────────────────

    /// Construct mock `ArCoefficientEstimate` entries for 2 hydros with 3
    /// seasons each, call `build_estimation_report`, and verify that the
    /// report contains exactly 2 entries with the expected structure.
    #[test]
    fn test_estimation_report_structure() {
        let h1 = EntityId(1);
        let h2 = EntityId(2);
        let n_seasons = 3_usize;

        // Build mock estimates: 2 hydros x 3 seasons, max order 2.
        let mut estimates = Vec::new();
        for &hydro_id in &[h1, h2] {
            for season_id in 0..n_seasons {
                estimates.push(ArCoefficientEstimate {
                    hydro_id,
                    season_id,
                    coefficients: vec![0.5, 0.3],
                    residual_std_ratio: 0.9,
                });
            }
        }

        let contribution_reductions: HashMap<EntityId, Vec<ContributionReduction>> = HashMap::new();
        let report =
            build_estimation_report(&estimates, n_seasons, &contribution_reductions, "PACF");

        assert_eq!(report.entries.len(), 2, "expected 2 hydro entries");

        for &hydro_id in &[h1, h2] {
            let entry = report.entries.get(&hydro_id).expect("entry must exist");
            assert_eq!(entry.selected_order, 2, "selected_order must be 2");
            assert_eq!(
                entry.coefficients.len(),
                n_seasons,
                "one coefficient vec per season"
            );
        }
    }

    /// When `OrderSelectionMethod::Fixed` is used, the returned `EstimationReport`
    /// must have an empty entries map.
    #[test]
    fn test_estimation_report_empty_for_fixed() {
        use cobre_core::temporal::Stage;
        use cobre_io::config::OrderSelectionMethod;

        let method = OrderSelectionMethod::Fixed;
        let observations: Vec<(EntityId, chrono::NaiveDate, f64)> = vec![];
        let seasonal_stats: Vec<SeasonalStats> = vec![];
        let stages: Vec<Stage> = vec![];
        let hydro_ids: Vec<EntityId> = vec![];
        let max_order = 2;

        let (_, report) = estimate_ar_coefficients_with_selection(
            &observations,
            &seasonal_stats,
            &stages,
            &hydro_ids,
            &ArEstimationConfig {
                max_order,
                max_coeff_magnitude: None,
                method: &method,
                season_map: None,
            },
        )
        .unwrap();

        assert!(
            report.entries.is_empty(),
            "Fixed method must produce empty EstimationReport"
        );
    }

    // ── Contribution validation tests ─────────────────────────────────────

    /// E2-003: Order-0 fallback when the first contribution is negative.
    ///
    /// A single-season AR(1) model with phi = -1.5 produces a negative
    /// direct contribution. `validate_order_contributions` should report
    /// max_valid_order = 0.
    #[test]
    fn test_contribution_order_zero_fallback() {
        let result = validate_order_contributions(
            0,             // season_id
            1,             // n_seasons
            1,             // current_order
            &[vec![-1.5]], // all_season_coefficients
            &[10.0],       // std_by_season
        );
        assert!(!result.valid);
        assert_eq!(result.max_valid_order, 0);
    }

    /// E2-003: Order-0 input returns valid immediately.
    #[test]
    fn test_contribution_order_zero_input_passes() {
        let result = validate_order_contributions(0, 1, 0, &[Vec::new()], &[10.0]);
        assert!(result.valid);
        assert_eq!(result.max_valid_order, 0);
        assert!(result.contributions.is_empty());
    }

    /// E2-003: Stable AR(2) model with all-positive contributions passes.
    #[test]
    fn test_contribution_stable_model_passes() {
        let result = validate_order_contributions(
            0,                 // season_id
            1,                 // n_seasons
            2,                 // current_order
            &[vec![0.4, 0.2]], // all_season_coefficients
            &[10.0],           // std_by_season
        );
        assert!(result.valid);
        assert_eq!(result.max_valid_order, 2);
        assert_eq!(result.contributions.len(), 2);
    }

    /// E2-003: apply_contribution_validation reduces an explosive model.
    ///
    /// Constructs AR(2) with coefficients [0.3, -0.8] for a single entity
    /// and single season. The contribution of lag 2 is negative (-0.71),
    /// so the order should be reduced to 1.
    #[test]
    fn test_apply_contribution_validation_reduces_explosive() {
        let hydro_id = EntityId(1);
        let n_seasons = 1;

        let mut estimates = vec![ArCoefficientEstimate {
            hydro_id,
            season_id: 0,
            coefficients: vec![0.3, -0.8],
            residual_std_ratio: 0.9,
        }];

        let stats = vec![SeasonalStats {
            entity_id: hydro_id,
            stage_id: 0,
            mean: 100.0,
            std: 10.0,
        }];
        let stats_map: HashMap<(EntityId, usize), &SeasonalStats> =
            stats.iter().map(|s| ((s.entity_id, 0_usize), s)).collect();
        // sigma2_per_order: [sigma2_order1, sigma2_order2]
        // At order 1, residual is sqrt(0.81) ~= 0.9
        let mut sigma2_map: HashMap<(EntityId, usize), Vec<f64>> = HashMap::new();
        sigma2_map.insert((hydro_id, 0), vec![0.81, 0.75]);

        let reductions = apply_contribution_validation(
            &mut estimates,
            n_seasons,
            &stats_map,
            &sigma2_map,
            None, // max_coeff_magnitude
        );

        // After validation, the order should be reduced: [0.3, -0.8] -> [0.3]
        assert_eq!(
            estimates[0].coefficients.len(),
            1,
            "explosive AR(2) should be reduced to AR(1)"
        );
        assert!((estimates[0].coefficients[0] - 0.3).abs() < 1e-10);

        // Residual std ratio should be recomputed from sigma2_per_order[0]
        assert!(
            (estimates[0].residual_std_ratio - 0.81_f64.sqrt()).abs() < 1e-10,
            "residual_std_ratio should be recomputed from sigma2_per_order[0]"
        );

        // Should have recorded the reduction event.
        let entity_reductions = reductions.get(&hydro_id).expect("should have reductions");
        assert_eq!(entity_reductions.len(), 1);
        assert_eq!(entity_reductions[0].original_order, 2);
        assert_eq!(entity_reductions[0].reduced_order, 1);
        assert_eq!(entity_reductions[0].season_id, 0);
    }

    /// E2-003: PIMENTAL-like scenario -- large coefficient at lag 2 in one
    /// season while other seasons are benign.
    #[test]
    fn test_pimental_like_multi_season_reduction() {
        let hydro_id = EntityId(156);
        let n_seasons = 12;

        // Build estimates: most seasons have small AR(1), August has explosive AR(2).
        let mut estimates: Vec<ArCoefficientEstimate> = (0..n_seasons)
            .map(|s| ArCoefficientEstimate {
                hydro_id,
                season_id: s,
                coefficients: if s == 7 {
                    // August: explosive AR(2) with huge lag-2 coefficient
                    vec![0.5, 48.9]
                } else {
                    vec![0.1] // benign AR(1)
                },
                residual_std_ratio: 0.95,
            })
            .collect();

        // Std devs: most seasons ~200, August = 5 (high coefficient * low std = trouble)
        let stds: Vec<f64> = (0..n_seasons)
            .map(|s| if s == 7 { 5.0 } else { 200.0 })
            .collect();

        let stats: Vec<SeasonalStats> = (0..n_seasons)
            .map(|s| SeasonalStats {
                entity_id: hydro_id,
                stage_id: s as i32,
                mean: 100.0,
                std: stds[s],
            })
            .collect();
        let stats_map: HashMap<(EntityId, usize), &SeasonalStats> = stats
            .iter()
            .enumerate()
            .map(|(s, st)| ((hydro_id, s), st))
            .collect();

        let sigma2_map: HashMap<(EntityId, usize), Vec<f64>> = HashMap::new();

        let reductions = apply_contribution_validation(
            &mut estimates,
            n_seasons,
            &stats_map,
            &sigma2_map,
            None, // max_coeff_magnitude
        );

        // August (season 7) should have been reduced from AR(2).
        // The contribution of lag 2 through the periodic chain may or may not be negative
        // depending on the recursive composition with neighboring months' coefficients.
        // We verify the reduction was applied if any reduction occurred for August.
        let august_order = estimates[7].coefficients.len();

        // Other months should remain unchanged at AR(1) since their contributions
        // are small positive values.
        for (s, est) in estimates.iter().enumerate() {
            if s != 7 {
                assert_eq!(est.coefficients.len(), 1, "season {s} should remain AR(1)");
            }
        }

        // If August was reduced, there should be a reduction record.
        if august_order < 2 {
            let entity_reductions = reductions.get(&hydro_id).expect("should have reductions");
            assert!(
                entity_reductions.iter().any(|r| r.season_id == 7),
                "August reduction should be recorded"
            );
        }
    }

    /// E2-003: All contributions negative forces white-noise fallback.
    ///
    /// AR(1) with phi = -2.0 for all seasons -- every contribution is negative,
    /// so order drops to 0 and residual_std_ratio becomes 1.0.
    #[test]
    fn test_all_negative_fallback_to_white_noise() {
        let hydro_id = EntityId(1);
        let n_seasons = 1;

        let mut estimates = vec![ArCoefficientEstimate {
            hydro_id,
            season_id: 0,
            coefficients: vec![-2.0],
            residual_std_ratio: 0.8,
        }];

        let stats = vec![SeasonalStats {
            entity_id: hydro_id,
            stage_id: 0,
            mean: 50.0,
            std: 10.0,
        }];
        let stats_map: HashMap<(EntityId, usize), &SeasonalStats> =
            stats.iter().map(|s| ((s.entity_id, 0_usize), s)).collect();
        let sigma2_map: HashMap<(EntityId, usize), Vec<f64>> = HashMap::new();

        let _reductions = apply_contribution_validation(
            &mut estimates,
            n_seasons,
            &stats_map,
            &sigma2_map,
            None, // max_coeff_magnitude
        );

        assert!(
            estimates[0].coefficients.is_empty(),
            "should fall back to order 0"
        );
        assert!(
            (estimates[0].residual_std_ratio - 1.0).abs() < 1e-10,
            "white-noise residual ratio should be 1.0"
        );
    }

    // ── Phi_1 rejection tests ────────────────────────────────────────────────

    #[test]
    fn phi1_rejection_sets_order_to_zero() {
        let hydro_id = EntityId(1);
        let n_seasons = 2;

        let mut estimates = vec![
            ArCoefficientEstimate {
                hydro_id,
                season_id: 0,
                coefficients: vec![-0.3, 0.5],
                residual_std_ratio: 0.8,
            },
            ArCoefficientEstimate {
                hydro_id,
                season_id: 1,
                coefficients: vec![0.4, 0.2],
                residual_std_ratio: 0.7,
            },
        ];

        let stats = vec![
            SeasonalStats {
                entity_id: hydro_id,
                stage_id: 0,
                mean: 50.0,
                std: 10.0,
            },
            SeasonalStats {
                entity_id: hydro_id,
                stage_id: 1,
                mean: 60.0,
                std: 12.0,
            },
        ];
        let stats_map: HashMap<(EntityId, usize), &SeasonalStats> = stats
            .iter()
            .enumerate()
            .map(|(i, s)| ((s.entity_id, i), s))
            .collect();
        let sigma2_map: HashMap<(EntityId, usize), Vec<f64>> = HashMap::new();

        let reductions =
            apply_contribution_validation(&mut estimates, n_seasons, &stats_map, &sigma2_map, None);

        // Season 0 should have been rejected (negative phi_1).
        assert!(
            estimates[0].coefficients.is_empty(),
            "season 0 should be cleared to order 0"
        );
        assert!(
            (estimates[0].residual_std_ratio - 1.0).abs() < 1e-10,
            "season 0 residual_std_ratio should be 1.0"
        );

        // Season 1 should be unchanged.
        assert_eq!(
            estimates[1].coefficients,
            vec![0.4, 0.2],
            "season 1 should be unchanged"
        );

        // Verify reduction entry for season 0.
        let hydro_reductions = reductions.get(&hydro_id).expect("should have reductions");
        let r = hydro_reductions
            .iter()
            .find(|r| r.season_id == 0)
            .expect("should have reduction for season 0");
        assert_eq!(r.original_order, 2);
        assert_eq!(r.reduced_order, 0);
        assert!(r.contributions.is_empty());
    }

    #[test]
    fn phi1_rejection_before_contribution_analysis() {
        // A season with phi_1 = -0.01 and phi_2 = 0.5 with uniform std.
        // The contribution analysis would NOT catch this (contributions may
        // be non-negative), but the phi_1 gate fires first.
        let hydro_id = EntityId(1);
        let n_seasons = 1;

        let mut estimates = vec![ArCoefficientEstimate {
            hydro_id,
            season_id: 0,
            coefficients: vec![-0.01, 0.5],
            residual_std_ratio: 0.8,
        }];

        let stats = vec![SeasonalStats {
            entity_id: hydro_id,
            stage_id: 0,
            mean: 50.0,
            std: 10.0,
        }];
        let stats_map: HashMap<(EntityId, usize), &SeasonalStats> =
            stats.iter().map(|s| ((s.entity_id, 0_usize), s)).collect();
        let sigma2_map: HashMap<(EntityId, usize), Vec<f64>> = HashMap::new();

        let reductions =
            apply_contribution_validation(&mut estimates, n_seasons, &stats_map, &sigma2_map, None);

        // Should be rejected by phi_1 gate.
        assert!(
            estimates[0].coefficients.is_empty(),
            "phi_1 = -0.01 should trigger rejection"
        );
        assert!(
            reductions.contains_key(&hydro_id),
            "should have a reduction entry"
        );
    }

    #[test]
    fn phi1_zero_is_not_rejected() {
        let hydro_id = EntityId(1);
        let n_seasons = 1;

        let mut estimates = vec![ArCoefficientEstimate {
            hydro_id,
            season_id: 0,
            coefficients: vec![0.0, 0.3],
            residual_std_ratio: 0.8,
        }];

        let stats = vec![SeasonalStats {
            entity_id: hydro_id,
            stage_id: 0,
            mean: 50.0,
            std: 10.0,
        }];
        let stats_map: HashMap<(EntityId, usize), &SeasonalStats> =
            stats.iter().map(|s| ((s.entity_id, 0_usize), s)).collect();
        let sigma2_map: HashMap<(EntityId, usize), Vec<f64>> = HashMap::new();

        let _reductions =
            apply_contribution_validation(&mut estimates, n_seasons, &stats_map, &sigma2_map, None);

        // phi_1 = 0.0 is NOT negative, so it should pass to contribution analysis.
        assert_eq!(
            estimates[0].coefficients.len(),
            2,
            "phi_1 = 0.0 should not be rejected"
        );
    }

    #[test]
    fn phi1_rejection_interacts_with_magnitude_bound() {
        // phi_1 = -50.0 is both negative and above any reasonable magnitude bound.
        // The magnitude-bound pre-pass should fire first, and the phi_1 check
        // should see the already-cleared vector and skip.
        let hydro_id = EntityId(1);
        let n_seasons = 1;

        let mut estimates = vec![ArCoefficientEstimate {
            hydro_id,
            season_id: 0,
            coefficients: vec![-50.0, 0.3],
            residual_std_ratio: 0.8,
        }];

        let stats = vec![SeasonalStats {
            entity_id: hydro_id,
            stage_id: 0,
            mean: 50.0,
            std: 10.0,
        }];
        let stats_map: HashMap<(EntityId, usize), &SeasonalStats> =
            stats.iter().map(|s| ((s.entity_id, 0_usize), s)).collect();
        let sigma2_map: HashMap<(EntityId, usize), Vec<f64>> = HashMap::new();

        let reductions = apply_contribution_validation(
            &mut estimates,
            n_seasons,
            &stats_map,
            &sigma2_map,
            Some(10.0), // magnitude bound that catches -50.0
        );

        // The season should be cleared (by magnitude bound).
        assert!(
            estimates[0].coefficients.is_empty(),
            "should be cleared to order 0"
        );

        // Only ONE reduction entry should exist (from magnitude bound, NOT doubled).
        let hydro_reductions = reductions.get(&hydro_id).expect("should have reductions");
        assert_eq!(
            hydro_reductions.len(),
            1,
            "should have exactly 1 reduction entry (magnitude bound only)"
        );
    }

    // ── Iterative PACF reduction tests ──────────────────────────────────────

    /// Generate synthetic observations for a single-season AR(p) process.
    /// Uses a fixed seed for reproducibility.
    #[allow(clippy::cast_precision_loss, clippy::cast_lossless)] // u64 >> 33 fits in f64; u32::MAX fits in f64
    fn generate_ar_observations(coefficients: &[f64], n: usize) -> Vec<f64> {
        let p = coefficients.len();
        let mut values = vec![0.0_f64; n + p];
        // Simple deterministic pseudo-noise using a linear congruential generator.
        let mut seed: u64 = 42;
        for i in p..(n + p) {
            seed = seed.wrapping_mul(6_364_136_223_846_793_005).wrapping_add(1);
            let noise = ((seed >> 33) as f64 / (u32::MAX as f64) - 0.5) * 2.0;
            let mut val = noise;
            for (j, c) in coefficients.iter().enumerate() {
                val += c * values[i - j - 1];
            }
            values[i] = val;
        }
        values[p..].to_vec()
    }

    #[test]
    fn iterative_reduction_terminates_at_zero() {
        // Construct a case where contributions fail at every order,
        // forcing the loop to terminate at order 0.
        let hydro_id = EntityId(1);
        let n_seasons = 1;

        // Coefficients that produce negative contributions at order 2.
        // phi = [0.3, -5.0] -> contribution at lag 2 = 0.3*0.3 + (-5.0) < 0
        // After reducing max_order to 1 and re-running PACF, if PACF selects
        // order 1, contributions at order 1 are just 0.3 (positive).
        // But if we use coefficients that ALWAYS produce negative contributions,
        // the order will reach 0.
        //
        // Use phi = [-0.5] -> phi_1 is negative, caught by phi_1 gate directly.
        // Instead, use a scenario where contribution analysis fails at every order.
        //
        // We test this via the direct function with synthetic data designed
        // so that PACF at each reduced max_order still selects an order
        // that fails contributions.
        //
        // Simplest approach: use the apply_contribution_validation path
        // (for Fixed method) to verify order-0 termination.
        let mut estimates = vec![ArCoefficientEstimate {
            hydro_id,
            season_id: 0,
            coefficients: vec![0.3, -0.8],
            residual_std_ratio: 0.8,
        }];

        let stats = vec![SeasonalStats {
            entity_id: hydro_id,
            stage_id: 0,
            mean: 50.0,
            std: 10.0,
        }];
        let stats_map: HashMap<(EntityId, usize), &SeasonalStats> =
            stats.iter().map(|s| ((s.entity_id, 0_usize), s)).collect();
        let sigma2_map: HashMap<(EntityId, usize), Vec<f64>> = HashMap::new();

        let reductions =
            apply_contribution_validation(&mut estimates, n_seasons, &stats_map, &sigma2_map, None);

        // The loop should terminate (possibly at a reduced order or order 0).
        // The key assertion is that it terminates and doesn't infinite-loop.
        assert!(
            estimates[0].coefficients.len() < 2,
            "order should be reduced from 2; got {}",
            estimates[0].coefficients.len()
        );

        // Should have at least one reduction entry.
        assert!(
            reductions.contains_key(&hydro_id),
            "should have a reduction entry"
        );
    }

    #[test]
    fn iterative_reduction_only_affects_failing_seasons() {
        // Two seasons: season 0 has negative contributions, season 1 passes.
        let hydro_id = EntityId(1);
        let n_seasons = 2;

        let mut estimates = vec![
            ArCoefficientEstimate {
                hydro_id,
                season_id: 0,
                // phi = [0.3, -0.8]: contribution at lag 2 is negative.
                coefficients: vec![0.3, -0.8],
                residual_std_ratio: 0.8,
            },
            ArCoefficientEstimate {
                hydro_id,
                season_id: 1,
                // phi = [0.4, 0.2]: all contributions positive.
                coefficients: vec![0.4, 0.2],
                residual_std_ratio: 0.7,
            },
        ];

        let stats = vec![
            SeasonalStats {
                entity_id: hydro_id,
                stage_id: 0,
                mean: 50.0,
                std: 10.0,
            },
            SeasonalStats {
                entity_id: hydro_id,
                stage_id: 1,
                mean: 60.0,
                std: 10.0,
            },
        ];
        let stats_map: HashMap<(EntityId, usize), &SeasonalStats> = stats
            .iter()
            .enumerate()
            .map(|(i, s)| ((s.entity_id, i), s))
            .collect();
        let sigma2_map: HashMap<(EntityId, usize), Vec<f64>> = HashMap::new();

        let _reductions =
            apply_contribution_validation(&mut estimates, n_seasons, &stats_map, &sigma2_map, None);

        // Season 0 should have been reduced.
        assert!(
            estimates[0].coefficients.len() < 2,
            "season 0 order should be reduced from 2; got {}",
            estimates[0].coefficients.len()
        );

        // Season 1 should remain unchanged at order 2.
        assert_eq!(
            estimates[1].coefficients,
            vec![0.4, 0.2],
            "season 1 should be unchanged"
        );
    }

    #[test]
    fn iterative_pacf_reduction_with_synthetic_observations() {
        // Test the iterative_pacf_reduction function directly with
        // synthetic observations. Generate data from a known AR process,
        // then artificially set coefficients that will fail contribution
        // analysis to trigger re-selection.
        let hydro_id = EntityId(1);
        let n_seasons = 1;

        // Generate enough observations for PACF to work.
        let obs = generate_ar_observations(&[0.5, 0.2], 100);

        let mut group_obs: HashMap<(EntityId, usize), Vec<f64>> = HashMap::new();
        group_obs.insert((hydro_id, 0), obs);

        let stats = vec![SeasonalStats {
            entity_id: hydro_id,
            stage_id: 0,
            mean: 0.0,
            std: 1.0,
        }];
        let stats_map: HashMap<(EntityId, usize), &SeasonalStats> =
            stats.iter().map(|s| ((s.entity_id, 0_usize), s)).collect();

        // Start with coefficients that have a bad high-order term.
        // The iterative loop should reduce the order.
        let mut estimates = vec![ArCoefficientEstimate {
            hydro_id,
            season_id: 0,
            coefficients: vec![0.5, 0.2, -5.0], // order 3, lag 3 will fail
            residual_std_ratio: 0.8,
        }];

        let reductions = iterative_pacf_reduction(
            &mut estimates,
            n_seasons,
            &[hydro_id],
            &group_obs,
            &stats_map,
            3, // initial_max_order
            1.96,
            None,
        );

        // The function should have re-estimated using PACF at a reduced
        // max_order. The exact final order depends on the PACF selection,
        // but it should be less than the original 3.
        assert!(
            estimates[0].coefficients.len() < 3,
            "order should be reduced from 3; got {}",
            estimates[0].coefficients.len()
        );

        // Should have at least one reduction entry.
        assert!(
            reductions.contains_key(&hydro_id),
            "should have a reduction entry"
        );
    }

    #[test]
    fn fixed_path_uses_truncation_not_reselection() {
        // Verify that the Fixed order selection path still uses
        // apply_contribution_validation (truncation), not iterative PACF.
        // We check this by verifying the behavior matches truncation semantics.
        let hydro_id = EntityId(1);
        let n_seasons = 1;

        // phi = [0.5, 0.2, -0.8]: contribution at lag 3 is negative.
        // Truncation would give order 2 (find_max_valid_order).
        // Iterative PACF would re-run PACF at max_order=2 and possibly
        // select a different order.
        let mut estimates = vec![ArCoefficientEstimate {
            hydro_id,
            season_id: 0,
            coefficients: vec![0.5, 0.2, -0.8],
            residual_std_ratio: 0.8,
        }];

        let stats = vec![SeasonalStats {
            entity_id: hydro_id,
            stage_id: 0,
            mean: 50.0,
            std: 10.0,
        }];
        let stats_map: HashMap<(EntityId, usize), &SeasonalStats> =
            stats.iter().map(|s| ((s.entity_id, 0_usize), s)).collect();
        let sigma2_map: HashMap<(EntityId, usize), Vec<f64>> = HashMap::new();

        let reductions =
            apply_contribution_validation(&mut estimates, n_seasons, &stats_map, &sigma2_map, None);

        // With truncation, the Fixed path truncates coefficients at the
        // first negative contribution position, which is at lag 3 (index 2).
        // So max_valid_order should be 2 or less.
        let final_order = estimates[0].coefficients.len();
        assert!(
            final_order <= 2,
            "Fixed path should truncate; got order {final_order}"
        );

        // Verify a reduction was recorded.
        assert!(
            reductions.contains_key(&hydro_id),
            "should have a reduction entry"
        );
        let r = &reductions[&hydro_id][0];
        assert_eq!(r.original_order, 3);
        // The reduced order should be from find_max_valid_order (truncation).
        assert!(r.reduced_order <= 2, "truncation should produce order <= 2");
    }

    // ── Combined strategy and reduction reason tests ─────────────────────────

    #[test]
    fn combined_strategies_produce_correct_reduction_reasons() {
        let h1 = EntityId(1);
        let h2 = EntityId(2);
        let n_seasons = 2;

        let mut estimates = vec![
            // H1 S0: negative phi_1 -> Phi1Negative
            ArCoefficientEstimate {
                hydro_id: h1,
                season_id: 0,
                coefficients: vec![-0.3, 0.5],
                residual_std_ratio: 0.8,
            },
            // H1 S1: negative contribution at lag 3 -> NegativeContribution
            ArCoefficientEstimate {
                hydro_id: h1,
                season_id: 1,
                coefficients: vec![0.5, 0.2, -0.8],
                residual_std_ratio: 0.7,
            },
            // H2 S0: magnitude bound -> MagnitudeBound
            ArCoefficientEstimate {
                hydro_id: h2,
                season_id: 0,
                coefficients: vec![50.0],
                residual_std_ratio: 0.9,
            },
            // H2 S1: passes -> no reduction
            ArCoefficientEstimate {
                hydro_id: h2,
                season_id: 1,
                coefficients: vec![0.4, 0.2],
                residual_std_ratio: 0.7,
            },
        ];

        let stats = vec![
            SeasonalStats {
                entity_id: h1,
                stage_id: 0,
                mean: 50.0,
                std: 10.0,
            },
            SeasonalStats {
                entity_id: h1,
                stage_id: 1,
                mean: 60.0,
                std: 10.0,
            },
            SeasonalStats {
                entity_id: h2,
                stage_id: 0,
                mean: 70.0,
                std: 15.0,
            },
            SeasonalStats {
                entity_id: h2,
                stage_id: 1,
                mean: 80.0,
                std: 12.0,
            },
        ];
        let stats_map: HashMap<(EntityId, usize), &SeasonalStats> = stats
            .iter()
            .enumerate()
            .map(|(i, s)| ((s.entity_id, i % 2), s))
            .collect();
        let sigma2_map: HashMap<(EntityId, usize), Vec<f64>> = HashMap::new();

        let reductions = apply_contribution_validation(
            &mut estimates,
            n_seasons,
            &stats_map,
            &sigma2_map,
            Some(10.0),
        );

        // H1 S0: phi_1 negative -> Phi1Negative, order 0
        let h1_reductions = &reductions[&h1];
        let h1_s0 = h1_reductions
            .iter()
            .find(|r| r.season_id == 0)
            .expect("should have reduction for H1 S0");
        assert_eq!(h1_s0.reason, ReductionReason::Phi1Negative);
        assert_eq!(h1_s0.reduced_order, 0);

        // H1 S1: negative contribution -> NegativeContribution
        let h1_s1 = h1_reductions
            .iter()
            .find(|r| r.season_id == 1)
            .expect("should have reduction for H1 S1");
        assert_eq!(h1_s1.reason, ReductionReason::NegativeContribution);

        // H2 S0: magnitude bound -> MagnitudeBound, order 0
        let h2_reductions = &reductions[&h2];
        let h2_s0 = h2_reductions
            .iter()
            .find(|r| r.season_id == 0)
            .expect("should have reduction for H2 S0");
        assert_eq!(h2_s0.reason, ReductionReason::MagnitudeBound);
        assert_eq!(h2_s0.reduced_order, 0);

        // H2 S1: no reduction
        assert!(
            !h2_reductions.iter().any(|r| r.season_id == 1),
            "H2 S1 should have no reduction"
        );
    }

    // ── Pre-study stage expansion tests (T005) ──────────────────────────────

    /// Build a Stage with the given parameters, suitable for expansion tests.
    fn make_expansion_stage(
        index: usize,
        id: i32,
        season_id: Option<usize>,
    ) -> cobre_core::temporal::Stage {
        use chrono::NaiveDate;
        use cobre_core::temporal::{
            Block, BlockMode, NoiseMethod, ScenarioSourceConfig, StageRiskConfig, StageStateConfig,
        };

        cobre_core::temporal::Stage {
            index,
            id,
            start_date: NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
            end_date: NaiveDate::from_ymd_opt(2024, 2, 1).unwrap(),
            season_id,
            blocks: vec![Block {
                index: 0,
                name: "SINGLE".to_string(),
                duration_hours: 744.0,
            }],
            block_mode: BlockMode::Parallel,
            state_config: StageStateConfig {
                storage: true,
                inflow_lags: false,
            },
            risk_config: StageRiskConfig::Expectation,
            scenario_config: ScenarioSourceConfig {
                branching_factor: 10,
                noise_method: NoiseMethod::Saa,
            },
        }
    }

    #[test]
    fn seasonal_stats_to_rows_includes_prestudy_stages() {
        // 3 study stages (id 0, 1, 2; seasons 0, 1, 2)
        // 2 pre-study stages (id -1, -2; seasons 2, 1)
        let stages = vec![
            make_expansion_stage(0, -2, Some(1)),
            make_expansion_stage(1, -1, Some(2)),
            make_expansion_stage(2, 0, Some(0)),
            make_expansion_stage(3, 1, Some(1)),
            make_expansion_stage(4, 2, Some(2)),
        ];

        let h1 = EntityId(1);
        // SeasonalStats for seasons 0, 1, 2 (stage_id is the first stage
        // with that season).
        let stats = vec![
            SeasonalStats {
                entity_id: h1,
                stage_id: 0,
                mean: 100.0,
                std: 20.0,
            },
            SeasonalStats {
                entity_id: h1,
                stage_id: 1,
                mean: 110.0,
                std: 22.0,
            },
            SeasonalStats {
                entity_id: h1,
                stage_id: 2,
                mean: 120.0,
                std: 24.0,
            },
        ];

        let rows = seasonal_stats_to_rows(&stats, &stages);

        // season 0: stage 0 only
        // season 1: stages -2 and 1
        // season 2: stages -1 and 2
        // Total: 1 + 2 + 2 = 5 rows
        assert_eq!(rows.len(), 5, "expected 5 rows (3 study + 2 pre-study)");

        // Verify pre-study rows exist with negative stage_ids.
        let prestudy_rows: Vec<_> = rows.iter().filter(|r| r.stage_id < 0).collect();
        assert_eq!(
            prestudy_rows.len(),
            2,
            "expected 2 pre-study rows, got {}",
            prestudy_rows.len()
        );

        // stage_id = -2 has season 1 -> (mean=110, std=22).
        let neg2 = rows.iter().find(|r| r.stage_id == -2).expect("row for -2");
        assert!((neg2.mean_m3s - 110.0).abs() < f64::EPSILON);
        assert!((neg2.std_m3s - 22.0).abs() < f64::EPSILON);

        // stage_id = -1 has season 2 -> (mean=120, std=24).
        let neg1 = rows.iter().find(|r| r.stage_id == -1).expect("row for -1");
        assert!((neg1.mean_m3s - 120.0).abs() < f64::EPSILON);
        assert!((neg1.std_m3s - 24.0).abs() < f64::EPSILON);

        // Rows should be sorted by (hydro_id, stage_id).
        for w in rows.windows(2) {
            assert!(
                (w[0].hydro_id.0, w[0].stage_id) <= (w[1].hydro_id.0, w[1].stage_id),
                "rows not sorted"
            );
        }
    }

    #[test]
    fn ar_estimates_to_rows_includes_prestudy_stages() {
        // Same stage layout as test 1.
        let stages = vec![
            make_expansion_stage(0, -2, Some(1)),
            make_expansion_stage(1, -1, Some(2)),
            make_expansion_stage(2, 0, Some(0)),
            make_expansion_stage(3, 1, Some(1)),
            make_expansion_stage(4, 2, Some(2)),
        ];

        let h1 = EntityId(1);
        // AR(1) estimates for seasons 0, 1, 2.
        let ar_estimates = vec![
            ArCoefficientEstimate {
                hydro_id: h1,
                season_id: 0,
                coefficients: vec![0.3],
                residual_std_ratio: 0.9,
            },
            ArCoefficientEstimate {
                hydro_id: h1,
                season_id: 1,
                coefficients: vec![0.4],
                residual_std_ratio: 0.85,
            },
            ArCoefficientEstimate {
                hydro_id: h1,
                season_id: 2,
                coefficients: vec![0.5],
                residual_std_ratio: 0.8,
            },
        ];

        let rows = ar_estimates_to_rows(&ar_estimates, &stages);

        // season 0 -> 1 stage (id 0): 1 row
        // season 1 -> 2 stages (ids -2, 1): 2 rows
        // season 2 -> 2 stages (ids -1, 2): 2 rows
        // Total: 5 rows (each AR(1), so 1 coefficient row per stage)
        assert_eq!(rows.len(), 5, "expected 5 rows");

        // Pre-study coefficient rows exist.
        let prestudy_rows: Vec<_> = rows.iter().filter(|r| r.stage_id < 0).collect();
        assert_eq!(prestudy_rows.len(), 2);

        // stage_id = -2 is season 1, coefficient = 0.4.
        let neg2 = rows.iter().find(|r| r.stage_id == -2).expect("row for -2");
        assert!((neg2.coefficient - 0.4).abs() < f64::EPSILON);
        assert!((neg2.residual_std_ratio - 0.85).abs() < f64::EPSILON);

        // stage_id = -1 is season 2, coefficient = 0.5.
        let neg1 = rows.iter().find(|r| r.stage_id == -1).expect("row for -1");
        assert!((neg1.coefficient - 0.5).abs() < f64::EPSILON);
        assert!((neg1.residual_std_ratio - 0.8).abs() < f64::EPSILON);
    }

    #[test]
    fn full_estimation_produces_prestudy_inflow_models() {
        use cobre_io::scenarios::assemble_inflow_models;

        // Build stages with 2 pre-study + 3 study.
        let stages = vec![
            make_expansion_stage(0, -2, Some(1)),
            make_expansion_stage(1, -1, Some(2)),
            make_expansion_stage(2, 0, Some(0)),
            make_expansion_stage(3, 1, Some(1)),
            make_expansion_stage(4, 2, Some(2)),
        ];

        let h1 = EntityId(1);

        // Build stats rows (including pre-study).
        let stats = vec![
            SeasonalStats {
                entity_id: h1,
                stage_id: 0,
                mean: 100.0,
                std: 20.0,
            },
            SeasonalStats {
                entity_id: h1,
                stage_id: 1,
                mean: 110.0,
                std: 22.0,
            },
            SeasonalStats {
                entity_id: h1,
                stage_id: 2,
                mean: 120.0,
                std: 24.0,
            },
        ];
        let stats_rows = seasonal_stats_to_rows(&stats, &stages);

        // Build coefficient rows.
        let ar_ests = vec![
            ArCoefficientEstimate {
                hydro_id: h1,
                season_id: 0,
                coefficients: vec![0.3],
                residual_std_ratio: 0.9,
            },
            ArCoefficientEstimate {
                hydro_id: h1,
                season_id: 1,
                coefficients: vec![0.4],
                residual_std_ratio: 0.85,
            },
            ArCoefficientEstimate {
                hydro_id: h1,
                season_id: 2,
                coefficients: vec![0.5],
                residual_std_ratio: 0.8,
            },
        ];
        let coeff_rows = ar_estimates_to_rows(&ar_ests, &stages);

        // Assemble into InflowModel.
        let inflow_models =
            assemble_inflow_models(stats_rows, coeff_rows).expect("assembly should succeed");

        // Should have entries for pre-study stages.
        assert!(
            inflow_models.iter().any(|m| m.stage_id < 0),
            "expected pre-study InflowModel entries (negative stage_id)"
        );

        // Pre-study models should have correct stats from their season.
        let prestudy_neg2 = inflow_models
            .iter()
            .find(|m| m.stage_id == -2)
            .expect("InflowModel for stage -2");
        assert!((prestudy_neg2.mean_m3s - 110.0).abs() < f64::EPSILON);
        assert!((prestudy_neg2.std_m3s - 22.0).abs() < f64::EPSILON);
    }
}