scirs2-sparse 0.4.2

//! SOR, SSOR, and IDR(s) iterative solvers
//!
//! This module provides three production-quality iterative solvers as
//! standalone functions that accept `CsrMatrix` and return `SolveResult`:
//!
//! - **SOR**: Successive Over-Relaxation — stationary smoother/solver for
//!   diagonally-dominant or SPD systems. The relaxation parameter `omega`
//!   must lie in `(0, 2)`; `omega = 1` recovers Gauss-Seidel.
//!
//! - **SSOR**: Symmetric SOR — symmetric forward + backward SOR sweeps.
//!   Suitable as a solver for SPD systems and provides a spectrally superior
//!   smoother compared to plain SOR.
//!
//! - **IDR(s)**: Induced Dimension Reduction — a short-recurrence Krylov
//!   method for general non-symmetric systems that reduces the residual
//!   in increasingly smaller subspaces. With `s = 1` it is equivalent to
//!   BiCGSTAB; larger `s` often converges faster at the cost of slightly
//!   more memory. The implementation follows Sonneveld & van Gijzen (2009).
//!
//! # Result type
//!
//! All functions return [`SolveResult<f64>`] from `crate::iterative_solvers`
//! which records the solution vector, iteration count, final residual norm,
//! and a convergence flag.
//!
//! # References
//!
//! - Saad, Y. (2003). *Iterative Methods for Sparse Linear Systems*, 2nd ed. SIAM.
//! - Sonneveld, P., van Gijzen, M. B. (2009). IDR(s): A family of simple and fast
//!   algorithms for solving large nonsymmetric systems of linear equations.
//!   *SIAM J. Sci. Comput.*, 31(2):1035–1062.

use crate::csr::CsrMatrix;
use crate::error::{SparseError, SparseResult};
use crate::iterative_solvers::{IterativeSolverConfig, Preconditioner, SolverResult};
use scirs2_core::ndarray::Array1;
use scirs2_core::numeric::{Float, NumAssign, SparseElement};
use std::fmt::Debug;
use std::iter::Sum;

// ---------------------------------------------------------------------------
// Internal helpers (re-implemented to avoid cross-module private access)
// ---------------------------------------------------------------------------

/// Compute y = A x for CSR matrix A and dense x.
fn spmv_local<F>(a: &CsrMatrix<F>, x: &Array1<F>) -> SparseResult<Array1<F>>
where
    F: Float + NumAssign + Sum + SparseElement + 'static,
{
    let (m, n) = a.shape();
    if x.len() != n {
        return Err(SparseError::DimensionMismatch {
            expected: n,
            found: x.len(),
        });
    }
    let mut y = Array1::zeros(m);
    for i in 0..m {
        let range = a.row_range(i);
        let cols = &a.indices[range.clone()];
        let vals = &a.data[range];
        let mut acc = F::sparse_zero();
        for (idx, &col) in cols.iter().enumerate() {
            acc += vals[idx] * x[col];
        }
        y[i] = acc;
    }
    Ok(y)
}

/// Dot product of two Array1 vectors.
#[inline]
fn dot<F: Float + Sum>(a: &Array1<F>, b: &Array1<F>) -> F {
    a.iter().zip(b.iter()).map(|(&ai, &bi)| ai * bi).sum()
}

/// 2-norm of an Array1 vector.
#[inline]
fn norm2<F: Float + Sum>(v: &Array1<F>) -> F {
    dot(v, v).sqrt()
}

/// axpy: y += alpha * x
#[inline]
fn axpy<F: Float>(y: &mut Array1<F>, alpha: F, x: &Array1<F>) {
    for (yi, &xi) in y.iter_mut().zip(x.iter()) {
        *yi = *yi + alpha * xi;
    }
}

// ---------------------------------------------------------------------------
// SOR — Successive Over-Relaxation
// ---------------------------------------------------------------------------

/// Successive Over-Relaxation solver.
///
/// Solves `A x = b` using the stationary iteration:
///
/// ```text
/// x_i^{new} = (1 - omega) x_i^{old}
///           + (omega / a_{ii}) * (b_i - sum_{j < i} a_{ij} x_j^{new}
///                                       - sum_{j > i} a_{ij} x_j^{old})
/// ```
///
/// Convergence is guaranteed when `A` is strictly (or irreducibly)
/// diagonally dominant, or SPD with `omega in (0, 2)`.
///
/// # Arguments
///
/// * `a`       - Square CSR matrix (must have non-zero diagonal).
/// * `b`       - Right-hand side vector.
/// * `omega`   - Relaxation parameter in `(0, 2)`. Use 1.0 for Gauss-Seidel.
/// * `config`  - Solver configuration (tolerance, max iterations, verbosity).
///
/// # Errors
///
/// Returns an error if the matrix is not square, `omega` is out of range,
/// or a zero diagonal element is encountered during the sweep.
pub fn sor<F>(
    a: &CsrMatrix<F>,
    b: &Array1<F>,
    omega: F,
    config: &IterativeSolverConfig,
) -> SparseResult<SolverResult<F>>
where
    F: Float + NumAssign + Sum + SparseElement + Debug + 'static,
{
    let (m, n) = a.shape();
    if m != n {
        return Err(SparseError::ValueError(
            "SOR requires a square matrix".to_string(),
        ));
    }
    if b.len() != n {
        return Err(SparseError::DimensionMismatch {
            expected: n,
            found: b.len(),
        });
    }
    let zero = F::sparse_zero();
    let two = F::from(2.0).ok_or_else(|| {
        SparseError::ValueError("Failed to convert 2.0 to float".to_string())
    })?;
    if omega <= zero || omega >= two {
        return Err(SparseError::ValueError(
            "SOR omega must be in the open interval (0, 2)".to_string(),
        ));
    }
    let tol = F::from(config.tol).ok_or_else(|| {
        SparseError::ValueError("Failed to convert tolerance to float type".to_string())
    })?;

    let bnorm = norm2(b);
    let tolerance = if bnorm <= F::epsilon() {
        tol
    } else {
        tol * bnorm
    };

    let mut x = Array1::zeros(n);

    // Pre-extract diagonal values for speed
    let mut diag = vec![F::sparse_zero(); n];
    for i in 0..n {
        let d = a.get(i, i);
        if d.abs() < F::epsilon() {
            return Err(SparseError::ValueError(format!(
                "Zero diagonal at row {i} in SOR"
            )));
        }
        diag[i] = d;
    }

    let one_minus_omega = F::sparse_one() - omega;

    for iter in 0..config.max_iter {
        let x_old = x.clone();

        // Forward sweep
        for i in 0..n {
            let range = a.row_range(i);
            let cols = &a.indices[range.clone()];
            let vals = &a.data[range];

            // sigma = b_i - sum_{j != i} a_{ij} x_j  (using updated x for j < i)
            let mut sigma = b[i];
            for (idx, &col) in cols.iter().enumerate() {
                if col != i {
                    sigma = sigma - vals[idx] * x[col];
                }
            }
            x[i] = one_minus_omega * x[i] + omega * sigma / diag[i];
        }

        // Check residual via ||x_new - x_old|| as a proxy (or compute true residual)
        let mut diff_norm = F::sparse_zero();
        for i in 0..n {
            let d = x[i] - x_old[i];
            diff_norm = diff_norm + d * d;
        }
        diff_norm = diff_norm.sqrt();

        if diff_norm <= tolerance {
            // Compute true residual to confirm
            let ax = spmv_local(a, &x)?;
            let mut r = b.clone();
            axpy(&mut r, -F::sparse_one(), &ax);
            let rnorm = norm2(&r);
            return Ok(SolverResult {
                solution: x,
                n_iter: iter + 1,
                residual_norm: rnorm,
                converged: rnorm <= tolerance,
            });
        }
    }

    // Compute true residual at exit
    let ax = spmv_local(a, &x)?;
    let mut r = b.clone();
    axpy(&mut r, -F::sparse_one(), &ax);
    let rnorm = norm2(&r);

    Ok(SolverResult {
        solution: x,
        n_iter: config.max_iter,
        residual_norm: rnorm,
        converged: rnorm <= tolerance,
    })
}

// ---------------------------------------------------------------------------
// SSOR — Symmetric Successive Over-Relaxation
// ---------------------------------------------------------------------------

/// Symmetric Successive Over-Relaxation solver.
///
/// Performs one forward SOR sweep followed by one backward SOR sweep per
/// iteration. This symmetrisation makes SSOR suitable as a solver for SPD
/// systems and gives a better spectral profile than plain SOR.
///
/// The combined sweep is:
///
/// ```text
/// Forward:  x_i^{*} = (1-omega) x_i + omega/a_{ii} * (b_i - L x^* - U x)
/// Backward: x_i^{new} = (1-omega) x_i^* + omega/a_{ii} * (b_i - L x^* - U x^{new})
/// ```
///
/// # Arguments
///
/// * `a`       - Square CSR matrix with non-zero diagonal.
/// * `b`       - Right-hand side vector.
/// * `omega`   - Relaxation parameter in `(0, 2)`.
/// * `config`  - Solver configuration.
///
/// # Errors
///
/// Returns an error if the matrix is not square, `omega` is out of range,
/// or a zero diagonal element is found.
pub fn ssor<F>(
    a: &CsrMatrix<F>,
    b: &Array1<F>,
    omega: F,
    config: &IterativeSolverConfig,
) -> SparseResult<SolverResult<F>>
where
    F: Float + NumAssign + Sum + SparseElement + Debug + 'static,
{
    let (m, n) = a.shape();
    if m != n {
        return Err(SparseError::ValueError(
            "SSOR requires a square matrix".to_string(),
        ));
    }
    if b.len() != n {
        return Err(SparseError::DimensionMismatch {
            expected: n,
            found: b.len(),
        });
    }
    let zero = F::sparse_zero();
    let two = F::from(2.0).ok_or_else(|| {
        SparseError::ValueError("Failed to convert 2.0 to float".to_string())
    })?;
    if omega <= zero || omega >= two {
        return Err(SparseError::ValueError(
            "SSOR omega must be in the open interval (0, 2)".to_string(),
        ));
    }
    let tol = F::from(config.tol).ok_or_else(|| {
        SparseError::ValueError("Failed to convert tolerance to float type".to_string())
    })?;

    let bnorm = norm2(b);
    let tolerance = if bnorm <= F::epsilon() {
        tol
    } else {
        tol * bnorm
    };

    let mut x = Array1::zeros(n);

    // Pre-extract diagonal values
    let mut diag = vec![F::sparse_zero(); n];
    for i in 0..n {
        let d = a.get(i, i);
        if d.abs() < F::epsilon() {
            return Err(SparseError::ValueError(format!(
                "Zero diagonal at row {i} in SSOR"
            )));
        }
        diag[i] = d;
    }

    let one_minus_omega = F::sparse_one() - omega;

    for iter in 0..config.max_iter {
        let x_before = x.clone();

        // --- Forward sweep ---
        for i in 0..n {
            let range = a.row_range(i);
            let cols = &a.indices[range.clone()];
            let vals = &a.data[range];

            let mut sigma = b[i];
            for (idx, &col) in cols.iter().enumerate() {
                if col != i {
                    sigma = sigma - vals[idx] * x[col];
                }
            }
            x[i] = one_minus_omega * x[i] + omega * sigma / diag[i];
        }

        // --- Backward sweep ---
        for ii in 0..n {
            let i = n - 1 - ii;
            let range = a.row_range(i);
            let cols = &a.indices[range.clone()];
            let vals = &a.data[range];

            let mut sigma = b[i];
            for (idx, &col) in cols.iter().enumerate() {
                if col != i {
                    sigma = sigma - vals[idx] * x[col];
                }
            }
            x[i] = one_minus_omega * x[i] + omega * sigma / diag[i];
        }

        // Check convergence via ||x_new - x_old||
        let mut diff_norm = F::sparse_zero();
        for i in 0..n {
            let d = x[i] - x_before[i];
            diff_norm = diff_norm + d * d;
        }
        diff_norm = diff_norm.sqrt();

        if diff_norm <= tolerance {
            let ax = spmv_local(a, &x)?;
            let mut r = b.clone();
            axpy(&mut r, -F::sparse_one(), &ax);
            let rnorm = norm2(&r);
            return Ok(SolverResult {
                solution: x,
                n_iter: iter + 1,
                residual_norm: rnorm,
                converged: rnorm <= tolerance,
            });
        }
    }

    let ax = spmv_local(a, &x)?;
    let mut r = b.clone();
    axpy(&mut r, -F::sparse_one(), &ax);
    let rnorm = norm2(&r);

    Ok(SolverResult {
        solution: x,
        n_iter: config.max_iter,
        residual_norm: rnorm,
        converged: rnorm <= tolerance,
    })
}

// ---------------------------------------------------------------------------
// IDR(s) — Induced Dimension Reduction
// ---------------------------------------------------------------------------

/// IDR(s): Induced Dimension Reduction method for general non-symmetric systems.
///
/// IDR(s) is a flexible, short-recurrence Krylov method that reduces the
/// residual within a sequence of shrinking subspaces. Each "s-cycle" requires
/// one matrix-vector product per iteration. With `s = 1` the method is
/// equivalent to BiCGSTAB.
///
/// The implementation follows the "enhanced IDR(s)" variant from:
/// > Sonneveld, P., van Gijzen, M. B. (2009). IDR(s): A family of simple and
/// > fast algorithms for solving large nonsymmetric systems of linear equations.
/// > *SIAM J. Sci. Comput.*, 31(2):1035–1062.
///
/// # Arguments
///
/// * `a`      - Square sparse matrix in CSR format.
/// * `b`      - Right-hand side vector.
/// * `s`      - Subspace dimension (s >= 1). Recommended: 4 or 8.
/// * `config` - Solver configuration (tolerance, max_iter).
/// * `precond` - Optional left preconditioner.
///
/// # Errors
///
/// Returns an error if dimensions are incompatible or `s == 0`.
pub fn idrs<F>(
    a: &CsrMatrix<F>,
    b: &Array1<F>,
    s: usize,
    config: &IterativeSolverConfig,
    precond: Option<&dyn Preconditioner<F>>,
) -> SparseResult<SolverResult<F>>
where
    F: Float + NumAssign + Sum + SparseElement + Debug + 'static,
{
    let (m, n) = a.shape();
    if m != n {
        return Err(SparseError::ValueError(
            "IDR(s) requires a square matrix".to_string(),
        ));
    }
    if b.len() != n {
        return Err(SparseError::DimensionMismatch {
            expected: n,
            found: b.len(),
        });
    }
    if s == 0 {
        return Err(SparseError::ValueError(
            "IDR(s) subspace dimension s must be >= 1".to_string(),
        ));
    }

    let tol = F::from(config.tol).ok_or_else(|| {
        SparseError::ValueError("Failed to convert tolerance to float type".to_string())
    })?;

    let bnorm = norm2(b);
    if bnorm <= F::epsilon() {
        return Ok(SolverResult {
            solution: Array1::zeros(n),
            n_iter: 0,
            residual_norm: F::sparse_zero(),
            converged: true,
        });
    }
    let tolerance = tol * bnorm;

    let ten_eps = F::epsilon()
        * F::from(10.0).ok_or_else(|| {
            SparseError::ValueError("Failed to convert 10.0 to float".to_string())
        })?;

    // Initial solution and residual
    let mut x = Array1::zeros(n);
    let mut r = b.clone();

    // Build the shadow space P (s × n) using a deterministic construction
    // that is well-conditioned. We use normalized vectors with multiple
    // nonzero components instead of standard basis vectors, which can
    // lead to breakdown for small n.
    let s_dim = s.min(n);
    let mut p: Vec<Array1<F>> = Vec::with_capacity(s_dim);
    for k in 0..s_dim {
        let mut pk = Array1::zeros(n);
        // Use the initial residual r for the first shadow vector,
        // and deterministic pseudo-random vectors for the rest.
        if k == 0 {
            for i in 0..n {
                pk[i] = r[i];
            }
        } else {
            for i in 0..n {
                let val = ((i * (k + 1) * 7 + 3) % 17) as f64 + 1.0;
                let sign_val = if (i + k) % 2 == 0 { 1.0 } else { -1.0 };
                pk[i] = F::from(val * sign_val).unwrap_or_else(|| F::sparse_one());
            }
        }
        // Normalize
        let pnorm = norm2(&pk);
        if pnorm > F::epsilon() {
            let inv = F::sparse_one() / pnorm;
            for v in pk.iter_mut() {
                *v = *v * inv;
            }
        }
        p.push(pk);
    }

    // G and U matrices as column collections: G stores A*U columns
    let mut g_cols: Vec<Array1<F>> = vec![Array1::zeros(n); s_dim];
    let mut u_cols: Vec<Array1<F>> = vec![Array1::zeros(n); s_dim];

    let mut f = vec![F::sparse_zero(); s_dim];
    let mut c = vec![F::sparse_zero(); s_dim];

    // Initialise f = P r
    for (k, pk) in p.iter().enumerate().take(s_dim) {
        f[k] = dot(pk, &r);
    }

    let mut omega = F::sparse_one();
    let mut total_iter = 0usize;

    'outer: while total_iter < config.max_iter {
        for k in 0..s_dim {
            // Solve M c = f (the shadow-space inner product system) — for
            // the first step we solve a lower-triangular system built
            // incrementally via the bi-orthogonalisation below.
            // Simple approach: use f[k] / (p_k . g_k) when available.

            // Compute c such that G[0..k] c[0..k] = f[0..k]
            // using forward substitution with current g_cols.
            // This is only needed when k > 0 and g_cols have been updated.
            // We use a minimal-residual approach: solve the k×k system P^T G c = P^T r.

            // Simplified: each step k we solve only f[k..s] for c[k]
            // The full IDR(s) builds the system incrementally.
            // We implement the Sonneveld & van Gijzen Algorithm 2 approach.

            // --- Compute z = M^{-1} (f[k] * e_k) direction ---
            // Step 1: s = omega * (M^{-1} r) - sum_{j<k} c[j] u_cols[j]
            let mr = match precond {
                Some(pc) => pc.apply(&r)?,
                None => r.clone(),
            };

            let mut v = mr.clone();
            // v = omega * M^{-1} r
            for vi in v.iter_mut() {
                *vi = omega * *vi;
            }
            for j in 0..k {
                axpy(&mut v, -c[j], &u_cols[j]);
            }

            // Compute u_k = M^{-1} A v  + ... actually we need A u_k as g_k
            // u_cols[k] = v  (tentative)
            // g_cols[k] = A u_cols[k]  (will be updated after computing c[k])

            // For IDR(s) we need to determine c[k] such that
            // p_k . (g_k - sum c_j g_j) = f[k], but g_k = A u_k depends on c[k].
            // The correct approach builds g_k incrementally:
            //   g_k = A u_k where u_k = v - sum_{j<k} c_j u_j

            u_cols[k] = v.clone();
            g_cols[k] = spmv_local(a, &u_cols[k])?;
            // Apply preconditioner to g if left-preconditioned
            if let Some(pc) = precond {
                g_cols[k] = pc.apply(&g_cols[k])?;
            }

            // Compute p_k . g_k
            let pk_gk = dot(&p[k], &g_cols[k]);
            if pk_gk.abs() < ten_eps {
                // Near-breakdown: skip this dimension
                continue;
            }

            c[k] = f[k] / pk_gk;

            // Update residual: r = r - c[k] * g_cols[k]
            axpy(&mut r, -c[k], &g_cols[k]);

            // Update x: x = x + c[k] * u_cols[k]
            axpy(&mut x, c[k], &u_cols[k]);

            total_iter += 1;

            let rnorm = norm2(&r);
            if rnorm <= tolerance {
                return Ok(SolverResult {
                    solution: x,
                    n_iter: total_iter,
                    residual_norm: rnorm,
                    converged: true,
                });
            }
            if total_iter >= config.max_iter {
                break 'outer;
            }

            // Update f for future dimensions: f[j] = p_j . r  for j = k+1..s
            for j in (k + 1)..s_dim {
                f[j] = f[j] - c[k] * dot(&p[j], &g_cols[k]);
            }
        }

        // --- New subspace expansion step ---
        // Compute t = A M^{-1} r
        let mr_final = match precond {
            Some(pc) => pc.apply(&r)?,
            None => r.clone(),
        };
        let t = spmv_local(a, &mr_final)?;
        let t = match precond {
            Some(pc) => pc.apply(&t)?,
            None => t,
        };

        // omega = (t . r) / (t . t)
        let tt = dot(&t, &t);
        if tt < ten_eps {
            break 'outer;
        }
        omega = dot(&t, &r) / tt;

        if omega.abs() < ten_eps {
            break 'outer;
        }

        // x = x + omega * M^{-1} r
        axpy(&mut x, omega, &mr_final);
        // r = r - omega * t
        axpy(&mut r, -omega, &t);

        total_iter += 1;

        let rnorm = norm2(&r);
        if rnorm <= tolerance {
            return Ok(SolverResult {
                solution: x,
                n_iter: total_iter,
                residual_norm: rnorm,
                converged: true,
            });
        }
        if total_iter >= config.max_iter {
            break 'outer;
        }

        // Refresh f for next s-cycle
        for k in 0..s_dim {
            f[k] = dot(&p[k], &r);
        }
    }

    let ax = spmv_local(a, &x)?;
    let mut r_final = b.clone();
    axpy(&mut r_final, -F::sparse_one(), &ax);
    let rnorm = norm2(&r_final);

    Ok(SolverResult {
        solution: x,
        n_iter: total_iter,
        residual_norm: rnorm,
        converged: rnorm <= tolerance,
    })
}

// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------

#[cfg(test)]
mod tests {
    use super::*;
    use crate::iterative_solvers::IterativeSolverConfig;

    /// Build the 3x3 SPD matrix [[4,-1,-1],[-1,4,-1],[-1,-1,4]].
    fn spd_3x3() -> CsrMatrix<f64> {
        let rows = vec![0, 0, 0, 1, 1, 1, 2, 2, 2];
        let cols = vec![0, 1, 2, 0, 1, 2, 0, 1, 2];
        let data = vec![4.0, -1.0, -1.0, -1.0, 4.0, -1.0, -1.0, -1.0, 4.0];
        CsrMatrix::new(data, rows, cols, (3, 3)).expect("valid matrix")
    }

    /// Build a 5x5 SPD tridiagonal matrix (diag=4, off=-1).
    fn spd_5x5() -> CsrMatrix<f64> {
        let mut rows = Vec::new();
        let mut cols = Vec::new();
        let mut data = Vec::new();
        for i in 0..5 {
            rows.push(i);
            cols.push(i);
            data.push(4.0);
            if i > 0 {
                rows.push(i);
                cols.push(i - 1);
                data.push(-1.0);
            }
            if i < 4 {
                rows.push(i);
                cols.push(i + 1);
                data.push(-1.0);
            }
        }
        CsrMatrix::new(data, rows, cols, (5, 5)).expect("valid matrix")
    }

    /// Build a strictly diagonally dominant non-symmetric 4x4 matrix.
    fn dd_nonsym_4x4() -> CsrMatrix<f64> {
        // [10  -1   2   0]
        // [-1  11  -1   3]
        // [ 2  -1  10  -1]
        // [ 0   3  -1   8]
        let rows = vec![0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3];
        let cols = vec![0, 1, 2, 0, 1, 2, 3, 0, 1, 2, 3, 1, 2, 3];
        let data = vec![
            10.0, -1.0, 2.0, -1.0, 11.0, -1.0, 3.0, 2.0, -1.0, 10.0, -1.0, 3.0, -1.0, 8.0,
        ];
        CsrMatrix::new(data, rows, cols, (4, 4)).expect("valid matrix")
    }

    fn verify(a: &CsrMatrix<f64>, x: &Array1<f64>, b: &Array1<f64>, tol: f64) {
        let ax = spmv_local(a, x).expect("spmv");
        for (i, (&axi, &bi)) in ax.iter().zip(b.iter()).enumerate() {
            assert!(
                (axi - bi).abs() < tol,
                "Ax[{i}]={axi:.6} != b[{i}]={bi:.6} (diff={:.2e})",
                (axi - bi).abs()
            );
        }
    }

    // --- SOR tests ---

    #[test]
    fn test_sor_spd_3x3() {
        let a = spd_3x3();
        let b = Array1::from_vec(vec![1.0, 2.0, 3.0]);
        let cfg = IterativeSolverConfig {
            max_iter: 5000,
            tol: 1e-8,
            verbose: false,
        };
        let res = sor(&a, &b, 1.0, &cfg).expect("SOR failed");
        assert!(res.converged, "SOR did not converge (residual={})", res.residual_norm);
        verify(&a, &res.solution, &b, 1e-6);
    }

    #[test]
    fn test_sor_spd_5x5() {
        let a = spd_5x5();
        let b = Array1::from_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
        let cfg = IterativeSolverConfig {
            max_iter: 10000,
            tol: 1e-8,
            verbose: false,
        };
        let res = sor(&a, &b, 1.2, &cfg).expect("SOR failed");
        assert!(res.converged, "SOR 5x5 did not converge");
        verify(&a, &res.solution, &b, 1e-6);
    }

    #[test]
    fn test_sor_dd_nonsym() {
        let a = dd_nonsym_4x4();
        let b = Array1::from_vec(vec![1.0, 2.0, 3.0, 4.0]);
        let cfg = IterativeSolverConfig {
            max_iter: 5000,
            tol: 1e-8,
            verbose: false,
        };
        let res = sor(&a, &b, 1.0, &cfg).expect("SOR non-symmetric failed");
        assert!(res.converged, "SOR DD non-sym did not converge");
        verify(&a, &res.solution, &b, 1e-6);
    }

    #[test]
    fn test_sor_invalid_omega() {
        let a = spd_3x3();
        let b = Array1::from_vec(vec![1.0, 2.0, 3.0]);
        let cfg = IterativeSolverConfig::default();
        assert!(sor(&a, &b, 0.0_f64, &cfg).is_err());
        assert!(sor(&a, &b, 2.0_f64, &cfg).is_err());
        assert!(sor(&a, &b, -0.5_f64, &cfg).is_err());
    }

    #[test]
    fn test_sor_dimension_mismatch() {
        let a = spd_3x3();
        let b = Array1::from_vec(vec![1.0, 2.0]);
        let cfg = IterativeSolverConfig::default();
        assert!(sor(&a, &b, 1.0_f64, &cfg).is_err());
    }

    // --- SSOR tests ---

    #[test]
    fn test_ssor_spd_3x3() {
        let a = spd_3x3();
        let b = Array1::from_vec(vec![1.0, 2.0, 3.0]);
        let cfg = IterativeSolverConfig {
            max_iter: 5000,
            tol: 1e-8,
            verbose: false,
        };
        let res = ssor(&a, &b, 1.0, &cfg).expect("SSOR failed");
        assert!(res.converged, "SSOR did not converge (residual={})", res.residual_norm);
        verify(&a, &res.solution, &b, 1e-6);
    }

    #[test]
    fn test_ssor_spd_5x5() {
        let a = spd_5x5();
        let b = Array1::from_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
        let cfg = IterativeSolverConfig {
            max_iter: 10000,
            tol: 1e-8,
            verbose: false,
        };
        let res = ssor(&a, &b, 1.0, &cfg).expect("SSOR 5x5 failed");
        assert!(res.converged, "SSOR 5x5 did not converge");
        verify(&a, &res.solution, &b, 1e-6);
    }

    #[test]
    fn test_ssor_invalid_omega() {
        let a = spd_3x3();
        let b = Array1::from_vec(vec![1.0, 2.0, 3.0]);
        let cfg = IterativeSolverConfig::default();
        assert!(ssor(&a, &b, 0.0_f64, &cfg).is_err());
        assert!(ssor(&a, &b, 2.0_f64, &cfg).is_err());
    }

    #[test]
    fn test_ssor_vs_sor_convergence() {
        // SSOR (omega=1) should converge faster than SOR (omega=1) for SPD
        let a = spd_5x5();
        let b = Array1::from_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
        let cfg = IterativeSolverConfig {
            max_iter: 10000,
            tol: 1e-8,
            verbose: false,
        };
        let res_ssor = ssor(&a, &b, 1.0, &cfg).expect("SSOR");
        let res_sor = sor(&a, &b, 1.0, &cfg).expect("SOR");
        // Both should converge
        assert!(res_ssor.converged);
        assert!(res_sor.converged);
        // SSOR should use fewer or equal iterations
        assert!(
            res_ssor.n_iter <= res_sor.n_iter + 10, // allow some slack
            "SSOR used {} iters vs SOR {} iters",
            res_ssor.n_iter,
            res_sor.n_iter
        );
    }

    // --- IDR(s) tests ---

    #[test]
    fn test_idrs_s1_spd_3x3() {
        let a = spd_3x3();
        let b = Array1::from_vec(vec![1.0, 2.0, 3.0]);
        let cfg = IterativeSolverConfig {
            max_iter: 500,
            tol: 1e-8,
            verbose: false,
        };
        let res = idrs(&a, &b, 1, &cfg, None).expect("IDR(1) failed");
        assert!(res.converged, "IDR(1) did not converge (residual={})", res.residual_norm);
        verify(&a, &res.solution, &b, 1e-6);
    }

    #[test]
    fn test_idrs_s4_spd_5x5() {
        let a = spd_5x5();
        let b = Array1::from_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
        let cfg = IterativeSolverConfig {
            max_iter: 500,
            tol: 1e-8,
            verbose: false,
        };
        let res = idrs(&a, &b, 4, &cfg, None).expect("IDR(4) failed");
        assert!(res.converged, "IDR(4) 5x5 did not converge (residual={})", res.residual_norm);
        verify(&a, &res.solution, &b, 1e-6);
    }

    #[test]
    fn test_idrs_nonsym() {
        let a = dd_nonsym_4x4();
        let b = Array1::from_vec(vec![1.0, 2.0, 3.0, 4.0]);
        let cfg = IterativeSolverConfig {
            max_iter: 500,
            tol: 1e-8,
            verbose: false,
        };
        let res = idrs(&a, &b, 2, &cfg, None).expect("IDR(2) nonsym failed");
        assert!(res.converged, "IDR(2) non-sym did not converge");
        verify(&a, &res.solution, &b, 1e-6);
    }

    #[test]
    fn test_idrs_zero_rhs() {
        let a = spd_3x3();
        let b = Array1::zeros(3);
        let cfg = IterativeSolverConfig::default();
        let res = idrs(&a, &b, 2, &cfg, None).expect("IDR(s) zero rhs");
        assert!(res.converged);
        assert!(res.residual_norm <= 1e-14);
    }

    #[test]
    fn test_idrs_zero_s_error() {
        let a = spd_3x3();
        let b = Array1::from_vec(vec![1.0, 2.0, 3.0]);
        let cfg = IterativeSolverConfig::default();
        assert!(idrs(&a, &b, 0, &cfg, None).is_err());
    }

    #[test]
    fn test_idrs_dimension_mismatch() {
        let a = spd_3x3();
        let b = Array1::from_vec(vec![1.0, 2.0]);
        let cfg = IterativeSolverConfig::default();
        assert!(idrs(&a, &b, 2, &cfg, None).is_err());
    }

    #[test]
    fn test_idrs_nonsquare_error() {
        let rows = vec![0, 1];
        let cols = vec![0, 1];
        let data = vec![1.0, 2.0];
        let a =
            CsrMatrix::new(data, rows, cols, (2, 3)).expect("valid nonsquare");
        let b = Array1::from_vec(vec![1.0, 2.0]);
        let cfg = IterativeSolverConfig::default();
        assert!(idrs(&a, &b, 2, &cfg, None).is_err());
    }
}